misc python

impractical B-M test
2024-02-25 14:26:53 +00:00 · 2024-02-12 10:56:27 +00:00
5 changed files with 364 additions and 8 deletions
--- a/alexandergriffing_spite.py
+++ b/alexandergriffing_spite.py
@ -0,0 +1,47 @@
+from decimal import Decimal
+import itertools
+
+def sum_series(series):
+    x = Decimal(0)
+    i = 0
+    t = 0
+    l = 0
+    # ethical
+    for value in series:
+        last_x = x
+        x += value
+        if x == last_x:
+            l += 1
+        else:
+            t = i
+            l = 0
+        if l > (i // 2 + 2): return x
+        i += 1
+
+def power_series(x, coefs):
+    def f():
+        a = Decimal(1)
+        for coef in coefs:
+            yield a * coef
+            a *= x
+    return sum_series(f())
+
+def derivative(power_series):
+    next(power_series)
+    for value, i in zip(power_series, itertools.count(1)):
+        yield i * value
+
+def cos():
+    for i in itertools.count(0):
+        yield Decimal(-1)**i / Decimal(math.factorial(i * 2))
+        yield 0
+
+def newton(func, x0):
+    x = x0
+    while True:
+        fx, fprimex = power_series(x, func()), power_series(x, derivative(func()))
+        x = x - fx / fprimex
+        yield x
+
+for half_pi in newton(cos, 1):
+    print(half_pi * 2)
--- a/lesswrong_to_tiktok.py
+++ b/lesswrong_to_tiktok.py
@ -0,0 +1,93 @@
+import wave, sys
+import nltk
+from balacoon_tts import TTS
+from collections import namedtuple
+import struct
+from PIL import Image, ImageDraw
+import cv2
+import numpy
+import subprocess
+
+WIDTH = 400
+
+AUDIO = "/tmp/x.wav"
+VIDEO = "/tmp/x.avi"
+BACKDROP = "/tmp/x.mkv"
+OUTPUT = "/tmp/x.mp4"
+
+def render_text(text: str):
+    render_params = {"font_size": 24}
+    im  = Image.new("RGBA", (WIDTH, WIDTH))
+    draw = ImageDraw.Draw(im)
+    loc = [0, 0]
+    toks = text.split()
+    toks.reverse()
+    text_commands = []
+    while toks:
+        chunk = []
+        while draw.textbbox(loc, " ".join(chunk), **render_params)[2] < WIDTH:
+            if not toks: break
+            chunk.append(toks.pop())
+        else: toks.append(chunk.pop())
+        bbox = draw.textbbox(loc, " ".join(chunk), **render_params)
+        text_commands.append((tuple(loc), " ".join(chunk)))
+        loc[1] = bbox[3]
+    draw.rectangle([0, 0, WIDTH, loc[1]], fill="white")
+    for loc, text in text_commands:
+        draw.text(loc, text, fill="black", **render_params)
+    return im
+
+Pause = namedtuple("Pause", ["length"])
+
+text = open("/home/osmarks/Downloads/seq1.txt").read()
+
+tts = TTS("/home/osmarks/Downloads/en_us_hifi_jets_cpu.addon")
+#supported_speakers = tts.get_speakers()
+speaker = "6670"
+
+def chunks(text: str) -> list[str | Pause]:
+    out = []
+    for line in text.splitlines():
+        if line:
+            for sent in nltk.sent_tokenize(line):
+                out.append(sent)
+                out.append(Pause(0.5))
+            out.append(Pause(1))
+    return out
+
+RATE = tts.get_sampling_rate()
+FPS = 30
+
+def wavblank(seconds):
+    return struct.pack(">h", 0) * round(seconds * RATE / 2)
+
+blank_frame = render_text("")
+
+fourcc = cv2.VideoWriter_fourcc(*"MJPG")
+video_writer = cv2.VideoWriter(VIDEO, fourcc, FPS, (WIDTH, WIDTH))
+total_dur = 0
+with wave.open(AUDIO, "w") as fp:
+    fp.setparams((1, 2, RATE, 0, "NONE", "NONE"))
+    for chunk in chunks(text):
+        if isinstance(chunk, str):
+            samples = tts.synthesize(chunk, speaker)
+            image = render_text(chunk)
+        elif isinstance(chunk, Pause):
+            samples = wavblank(chunk.length)
+            image = blank_frame
+        fp.writeframes(samples)
+        duration = len(samples) / RATE # what
+        total_dur += duration
+        frame = cv2.cvtColor(numpy.array(image), cv2.COLOR_RGBA2BGR)
+        for _ in range(round(duration * FPS)): video_writer.write(frame)
+        print(chunk, duration)
+video_writer.release()
+subprocess.run([
+    "ffmpeg",
+    "-i", BACKDROP, "-i", AUDIO, "-i", VIDEO,
+    "-filter_complex", "overlay=x=200:y=200,format=nv12,hwupload",
+    "-to", str(total_dur),
+    "-y",
+    "-vaapi_device", "/dev/dri/renderD128", "-c:v", "h264_vaapi",
+    OUTPUT
+]).check_returncode()
--- a/memeticize.py
+++ b/memeticize.py
@ -1,6 +1,6 @@
 import os, sys, subprocess, datetime

-dt_threshold = datetime.datetime(2022, 11, 27).timestamp()
+dt_threshold = datetime.datetime(2023, 6, 16).timestamp()

 _, indir, outdir = sys.argv
 for x in os.listdir(indir):
@ -19,4 +19,4 @@ for x in os.listdir(indir):
                        os.rename(inpath, newpath)
                        break
            else:
-                print("keeping")
+                print("keeping")
--- a/rng_trainer.html
+++ b/rng_trainer.html
@ -29,8 +29,163 @@
 </div>
 <div id="seq"></div>
 <script>
+    const error = e => { throw new Error(e) }
+
+    const floatField = {
+        mul: (a, b) => a * b,
+        add: (a, b) => a + b,
+        neg: a => -a,
+        inv: a => 1 / a,
+        zero: 0,
+        unity: 1
+    }
+    const gf2 = {
+        mul: (a, b) => a * b,
+        add: (a, b) => (a + b) % 2,
+        neg: a => a,
+        inv: a => a == 1 ? 1 : error("not invertible"),
+        zero: 0,
+        unity: 1
+    }
+
+    const evalPoly = (poly, x, field) => {
+        let a = field.zero
+        let b = field.unity
+        for (const coef of poly) {
+            a = field.add(field.mul(b, coef))
+            b = field.mul(b, x)
+        }
+        return a
+    }
+    const arrayOf = (n, x) => new Array(n).fill(x)
+
+    const xPowN = (n, field) => arrayOf(n, field.zero).concat([field.unity])
+    const polyField = field => {
+        const unity = [field.unity]
+        const zero = []
+        const add = (a, b) => {
+            const [ap, bp] = a.length > b.length ? [a, b] : [b, a]
+            return ap.map((aix, ix) => field.add(aix, bp[ix] ?? field.zero))
+        }
+        const mul = (a, b) => {
+            const out = arrayOf(a.length + b.length - 1, field.zero)
+            for (let i = 0; i < a.length; i++) {
+                for (let j = 0; j < b.length; j++) {
+                    out[i + j] = field.add(out[i + j], field.mul(a[i], b[j]))
+                }
+            }
+            return out
+        }
+        const neg = a => a.map(field.neg)
+        return {
+            add,
+            mul,
+            neg,
+            unity,
+            zero,
+            inv: () => error("unimplemented")
+        }
+    }
+
+    // blatantly copied from Wikipedia https://en.wikipedia.org/wiki/Berlekamp%E2%80%93Massey_algorithm#Pseudocode
+    const berlekampMassey = (sequence, field) => {
+        const polys = polyField(field)
+        const N = sequence.length
+        let C = polys.unity
+        let B = polys.unity
+
+        let L = 0;
+        let m = 1;
+        
+        let b = field.unity;
+
+        for (let n = 0; n < N; n++) {
+            let d = sequence[n]
+            for (let i = 1; i <= L; i++) {
+                d = field.add(d, field.mul(C[i], sequence[n - i]))
+            }
+            if (d == field.zero) {
+                m += 1
+            } else if (2 * L <= n) {
+                const T = C
+                C = polys.add(C, polys.neg(polys.mul(polys.mul([field.mul(d, field.inv(b))], xPowN(m, field)), B)))
+                L = n + 1 - L
+                B = T
+                b = d
+                m = 1
+            } else {
+                C = polys.add(C, polys.neg(polys.mul(polys.mul([field.mul(d, field.inv(b))], xPowN(m, field)), B)))
+                m += 1
+            }
+        }
+        return C
+    }
+
+    const polyToKey = p => p.join("")
+    const polyRecurrence = (polynomial, sequence) => gf2.mul(gf2.neg(gf2.inv(polynomial[0])), polynomial.slice(1).map((coef, ix) => gf2.mul(coef, sequence[sequence.length - 1 - ix])).reduce(gf2.add, gf2.zero))
+
+    const bmEnsemble = sequence => {
+        const seqlen = 10
+        const polys = new Map()
+        for (let i = 0; i < sequence.length; i++) {
+            const result = berlekampMassey(sequence.slice(i, i + seqlen), gf2)
+            polys.set(polyToKey(result), [1, 2, result])
+        }
+        for (let i = 0; i < sequence.length - 1; i++) {
+            const chunk = sequence.slice(0, i)
+            for (const [polystr, score] of polys.entries()) {
+                const poly = score[2]
+                if (chunk.length >= poly.length - 1) {
+                    const prediction = polyRecurrence(poly, chunk)
+                    if (prediction == sequence[i]) {
+                        score[0] += 1
+                    }
+                    score[1] += 1
+                }
+            }
+        }
+        let max = 0
+        let pred = 0
+        for (const [polystr, score] of polys.entries()) {
+            const bits = score[0] - score[1] - polystr.length
+            //console.log(polystr, bits)
+            const weight = 2**bits
+            max += weight
+            pred += weight * polyRecurrence(score[2], sequence)
+        }
+        console.log("BM", pred / max)
+        return max > 0 ? pred / max > 0.5 : 0
+    }
+
+    const aaronsonPredictor = sequence => {
+        let k = 4
+        const m = new Map()
+        for (let i = 0; i < sequence.length - 1; i++) {
+            const slic = polyToKey(sequence.slice(Math.max(i - k + 1, 0), i + 1))
+            if (!m.get(slic)) m.set(slic, [0, 0])
+            const score = m.get(slic)
+            score[1] += 1
+            score[0] += sequence[i + 1]
+        }
+        var res
+        while (k) {
+            const slic = polyToKey(sequence.slice(-k))
+            if (res = m.get(slic)) {
+                const prob = res[0] / res[1]
+                console.log("AO", prob, slic)
+                return prob > 0.5
+            }
+            k -= 1
+        }
+        return 0
+    }
+
+    let correct = {
+        "aaronson": 0,
+        "bm": 0
+    }
    var working = true
-    const FINALSEQLEN = 100
+    const FINALSEQLEN = 300
    const tests = {
        "RNG1": []
    }
@ -39,9 +194,18 @@
        if (working) {
            seq.push(val)
            qty.innerText = `${seq.length}/${FINALSEQLEN}`
+            if (seq.length > 0) {
+                correct.bm += bmEnsemble(seq.slice(0, seq.length - 1)) == seq[seq.length - 1] ? 1 : 0
+                correct.aaronson += aaronsonPredictor(seq.slice(0, seq.length - 1)) == seq[seq.length - 1] ? 1 : 0
+            }
            if (seq.length === FINALSEQLEN) {
                working = false
-                qty.innerText = "Done"
+                let accuracy = ""
+                for (const [name, count] of Object.entries(correct)) {
+                    accuracy += `; ${name} ${count / FINALSEQLEN * 100}%`
+                }
+                qty.innerText = `Done${accuracy}`
+                console.log(correct)
            }
        }
    }
@ -49,13 +213,13 @@
        working = true
        seq = []
    }
-    l.onclick = () => push("L")
-    r.onclick = () => push("R")
+    l.onclick = () => push(0)
+    r.onclick = () => push(1)
    window.onkeypress = ev => {
        if (ev.key.toLowerCase() == "l" || ev.key == "1") {
-            push("L")
+            push(0)
        } else if (ev.key.toLowerCase() == "r" || ev.key == "2") {
-            push("R")
+            push(1)
        }
    }
 </script>
--- a/rotating_audio.py
+++ b/rotating_audio.py
@ -0,0 +1,52 @@
+import pathlib
+import numpy as np
+import wave
+import math
+import tempfile
+import subprocess
+import contextlib
+
+cyc = 2
+nsamples = 48000 * 60
+path = pathlib.Path("~/Downloads/inputs").expanduser()
+
+inputs = []
+tempfiles = []
+with contextlib.ExitStack() as stack:
+    for file in path.glob("*"):
+        mgr = tempfile.NamedTemporaryFile("w")
+        stack.enter_context(mgr)
+        tempfiles.append(mgr)
+        subprocess.run(["ffmpeg", "-i", file, "-ar", "48000", "-ac", "1", "-filter:a", "dynaudnorm=p=1.0:s=5:g=15", "-y", "-f", "wav", mgr.name])
+
+    for t in tempfiles:
+        with wave.open(str(t.name), "r") as w:
+            assert w.getsampwidth() == 2 and w.getnchannels() == 1 and w.getframerate() == 48000
+            inputs.append(np.frombuffer(w.readframes(nsamples), np.dtype(np.int16).newbyteorder("<")).astype(float) / 32768)
+
+inputs = np.array(inputs) * (1/len(inputs)) # wrong but close enough
+offsets = np.broadcast_to(np.linspace(0, math.pi * 2, num=len(inputs) + 1)[:len(inputs)], (nsamples, len(inputs))).transpose()
+phase = np.broadcast_to(np.linspace(0, math.pi * 2 * cyc, num=nsamples), (len(inputs), nsamples)) + offsets
+empty = np.zeros_like(phase)
+x = np.cos(phase)
+y = np.sin(phase)
+
+def writeaudio(file, array):
+    wr = wave.open(file, "w")
+    wr.setnchannels(1)
+    wr.setsampwidth(2)
+    wr.setframerate(48000)
+    wr.writeframes((array * 32768).astype("<i2").tobytes())
+    wr.close()
+
+with tempfile.NamedTemporaryFile("wb") as left:
+    with tempfile.NamedTemporaryFile("wb") as front:
+        with tempfile.NamedTemporaryFile("wb") as right:
+            with tempfile.NamedTemporaryFile("wb") as back:
+                writeaudio(left, sum(np.where(-x > 0, -x, empty) * inputs))
+                writeaudio(right, sum(np.where(x > 0, x, empty) * inputs))
+                writeaudio(front, sum(np.where(y > 0, y, empty) * inputs))
+                writeaudio(back, sum(np.where(-y > 0, -y, empty) * inputs))
+                print(front.name)
+                # layout is technically front left + front right + back left + back right - ignore
+                subprocess.run(["ffmpeg", "-i", front.name, "-i", right.name, "-i", back.name, "-i", left.name, "-filter_complex", "[0:a][1:a]join=inputs=4:channel_layout=quad[a]", "-map", "[a]", "/tmp/out.opus"]).check_returncode()
Author	SHA1	Message	Date
osmarks	bb27d89936	misc python	2024-02-25 14:26:53 +00:00
osmarks	c6a66dbb8b	impractical B-M test	2024-02-12 10:56:27 +00:00