From c80ea1d0c7b8cf6654f08076e81175691a180d3b Mon Sep 17 00:00:00 2001 From: osmarks Date: Thu, 8 Apr 2021 16:28:33 +0100 Subject: [PATCH] New things, documentation --- .gitignore | 1 + README.md | 44 ++-- calibre-indexer/Cargo.lock | 381 +++++++++++++++++++++++++++++++ calibre-indexer/Cargo.toml | 15 ++ calibre-indexer/qxml.rs | 231 +++++++++++++++++++ calibre-indexer/src/main.rs | 219 ++++++++++++++++++ calibre-indexer/src/run-query.py | 12 + code-guessing/a2.c | 44 ++++ code-guessing/anagram.c | 75 ++++++ fractalart-rs/.gitignore | 2 + fractalart-rs/Cargo.lock | 216 ++++++++++++++++++ fractalart-rs/Cargo.toml | 12 + fractalart-rs/README.md | 11 + fractalart-rs/src/main.rs | 140 ++++++++++++ 14 files changed, 1389 insertions(+), 14 deletions(-) create mode 100644 .gitignore create mode 100644 calibre-indexer/Cargo.lock create mode 100644 calibre-indexer/Cargo.toml create mode 100644 calibre-indexer/qxml.rs create mode 100644 calibre-indexer/src/main.rs create mode 100644 calibre-indexer/src/run-query.py create mode 100644 code-guessing/a2.c create mode 100644 code-guessing/anagram.c create mode 100644 fractalart-rs/.gitignore create mode 100644 fractalart-rs/Cargo.lock create mode 100644 fractalart-rs/Cargo.toml create mode 100644 fractalart-rs/README.md create mode 100644 fractalart-rs/src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f90bb6b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +out.wav \ No newline at end of file diff --git a/README.md b/README.md index 8fd69dc..988eef1 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,42 @@ # random-stuff -In the interest of transparency and/or being vaguely useful, I'm releasing random junk from my projects folder publicly. +In the interest of transparency and/or being vaguely useful, I'm releasing many random and/or deterministic small things accumulated from various projects folders over the years. This comes with absolutely no guarantee of support or correct function, although if you need some of this for something I will *try* and answer any queries you might have. ## Contents (incomplete and rough list) -* some interpreters for esolangs due to events on the esolangs discord -* bad assembly hello world for some reason +* some interpreters for various esolangs, written for competitions on an esolangs Discord server +* trivial x86-64 assembly hello world for some reason * political compass visualizer thing for one Discord server -* simple un-hexadecimal-izer program (`base64 -d` exists but somehow not `base16 -d` or something) -* scripts for packing music + metadata onto Computronics (a Minecraft mod) tape images -* some thing to generate WAV files containing beeping noises +* simple un-hexadecimal-izer program (`base64 -d` exists but somehow not `base16 -d` or some equivalent) +* `generate-tape-image.py` - scripts for packing music + metadata onto Computronics (a Minecraft mod) tape images - these require LionRay to do the DFPWM conversion as of now. Can be played with something like [this](https://pastebin.com/SPyr8jrh). +* a thing to generate WAV files containing beeping noises * fairly transferable small bits of an abandoned JS project -* an extremely bad and/or unfinished cookie clicker thing where you press the enter key instead of clicking +* an extremely bad cookie clicker-style incremental thing where you press the enter key instead of clicking * a very simple web API wrapper for `luamin` -* some bodged old version of [it-was-inevitable](https://github.com/BenLubar/it_was_inevitable) which runs a local webserver instead of sending to Mastodon -* an extremely cursed program which bruteforces regexes or something? +* a tweaked old version of [it-was-inevitable](https://github.com/BenLubar/it_was_inevitable) which runs a local webserver instead of sending to Mastodon. Used to be used by PotatOS but this was discontinued due to RAM use. +* an extremely accursed program which bruteforces regexes or something? Not that this actually does anything beyond using vast amounts of CPU and printing things. * `realtau.txt`, which seemingly contains 100000 digits of τ. I wonder if there's a faketau somewhere. -* some weird thing which lets you use synonyms to get attributes on python objects -* something which generates random vaguely human readable names in Elm. Please note that I do NOT endorse the use of Elm and this is provided mostly just to make the languages list at the side weirder. +* a strange thing which lets you use synonyms to get attributes on python objects +* code for generating random vaguely human readable names in Elm. Please note that I do NOT endorse the use of Elm and this is provided mostly just to make the languages list at the side weirder rather than for any actual uses. * F# kerbal name generator & very basic stack calculator -* importer part of an unfinished wikipedia database dump viewer +* Wikipedia dump index indexer (I think some of this is just example code for an oddly specific crate which parses the dump XML) * `ptt.py` - Python-based systemwide push to talk (mutes and unmutes microphone via PulseAudio) with tray icon -* `list-sort.py` - Made for a competition, it sorts a list by making a somewhat weird Lispy language and implementing quicksort(ish) in it. -* `mcc.py` - a chat program. Unlike most chat programs, it runs over IPv6 multicast so you can talk to anyone on your LAN who also happens to have this program somehow. Very flaky, due to trying to autoguess a network interface to use and also limited testing, as well as quite barebones. \ No newline at end of file +* `code-guessing` - contains my entries, some test code, and build processes for my submissions to the Esolangs code guessing competition. There are also some things which never made it into an entry, such as my abuse of [Z3](https://github.com/Z3Prover/z3) to solve mazes (it's surprisingly effective). + * `list-sort.py`, which sorts lists of integers by interpreting a simple Lispy language and doing a continuation-passing-style quicksort (to avoid stack issues; it supports tail call optimization so this is "efficient"). + * `maze2.py`, which does simple depth first search to solve a maze in a pleasantly compact format. + * `multiply_matrices.py`, which abuses many Python features and does matrix multiplication in an inefficient recursive way which *looks* like Strassen's algorithm but isn't. + * `anagram.c`, which detects whether strings are (case-insensitively, and ignoring spaces) anagrams by uppercasing them, sorting them, and removing spaces and comparing them for equality. It does this by dividing the string into 16-byte chunks which can fit into a 128-bit `xmm` register (this had to run on Sandy Bridge systems, which lack AVX2), uppercasing them using three vector instructions (via the invariant that the input won't contain anything but `[A-Za-z ]`), applying a SIMD-based bubblesort to each chunk which swaps all the necessary pairs at once until it stops changing, and then using a 32-way (sequential; no idea how to parallelize this) merge to output a sorted string and discard spaces. These can then be checked for equality. +* `mcc.py` - a chat program. Unlike most chat programs, it runs over IPv6 multicast so you can talk to anyone on your LAN who also happens to have this program somehow. Very flaky, due to trying to autoguess a network interface to use and also limited testing, as well as quite barebones. +* `tiscubed.py` - an esolang somewhat like TIS-100, but with a somewhat exotic (almost no immediate operands, no registers, only 256B of memory per node) binary machine code format instead of assembly (there is an assembler available too). It's called "cubed" due to three dimensions, but I haven't actually done this yet. +* `iterated-prisoners-dilemma` - some scripts from an iterated prisoners' dilemma competition. Unfortunately, nobody came up with any particularly exciting algorithms for this. +* `calibre-indexer` - full text search for Calibre libraries, via SQLite. + * SQLite may not have been a great choice for this, as it cannot do concurrent writes. Nevertheless, the code works, if not particularly efficiently, and allows you to build a full text table (using [FTS5](https://sqlite.org/fts5.html)) to rapidly search in your Calibre library. + * While searches are near-instant, building the index is very slow (several deciseconds per book) and it takes up large amounts of disk space (though less than the original books, funnily, because those contain images). It's smart enough to not operate again on books it already has which haven't been changed, though. + * It only works on EPUBs, because I couldn't be bothered to support other formats (calibre can convert them anyway). + * Text (and chapter titles, ish) is extracted using a simple but seemingly fairly reliable state machine and `xml-rs`. + * I have not gotten round to releasing a nice-to-use frontend for this. You can use `run-query.py` for a less nice one. +* `length_terminated_strings.c` - a revolution in computer science, combining the efficient `strlen` of null-terminated strings with the... inclusion of length? of length-prefixed/fat-pointer strings. A length-terminated string has its length at the *end*, occupying 1 to 8 bytes. To find its length, simply traverse the string until the data at the end matches the length traversed so far. Yes, this implementation might slightly have a bit of undefined behaviour. +* `discord-message-dump.py`, which reads a GDPR data dump from Discord and copies all the messages in public channels to a CSV file. I used this for training of a GPT-2 instance on my messages (available on request). +* `spudnet-http.py` - connect to the SPUDNET backend underlying [PotatOS](https://git.osmarks.net/osmarks/potatOS/)'s ~~backdoors~~ remote debugging system via the convenient new HTTP long-polling-based API. +* `fractalart-rs` - [this](https://github.com/TomSmeets/FractalArt/) in Rust and faster, see its own README for more details. \ No newline at end of file diff --git a/calibre-indexer/Cargo.lock b/calibre-indexer/Cargo.lock new file mode 100644 index 0000000..82441f1 --- /dev/null +++ b/calibre-indexer/Cargo.lock @@ -0,0 +1,381 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + +[[package]] +name = "ahash" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e" + +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cddc5f91628367664cc7c69714ff08deee8a3efc54623011c772544d7b2767" + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "bzip2" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42b7c3cbf0fa9c1b82308d57191728ca0256cb821220f4e2fd410a72ade26e3b" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.10+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17fa3d1ac1ca21c5c4e36a97f3c3eb25084576f6fc47bf0139c1123434216c6c" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "calibre-indexer" +version = "0.1.0" +dependencies = [ + "anyhow", + "epub", + "lazy_static", + "num_cpus", + "rusqlite", + "xml-rs", +] + +[[package]] +name = "cc" +version = "1.0.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd" + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crc32fast" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "epub" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4086fc0bc91524e0a88bc13fa622e3b9fce38d5a91454e0667db97a4f39dc3" +dependencies = [ + "anyhow", + "percent-encoding", + "regex", + "xml-rs", + "zip", +] + +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "flate2" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cfff41391129e0a856d6d822600b8d71179d46879e310417eb9c762eb178b42" +dependencies = [ + "cfg-if 0.1.10", + "crc32fast", + "libc", + "miniz_oxide", +] + +[[package]] +name = "hashbrown" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashlink" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d99cf782f0dc4372d26846bec3de7804ceb5df083c2d4462c0b8d2330e894fa8" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "hermit-abi" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" +dependencies = [ + "libc", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8916b1f6ca17130ec6568feccee27c156ad12037880833a3b842a823236502e7" + +[[package]] +name = "libsqlite3-sys" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64d31059f22935e6c31830db5249ba2b7ecd54fd73a9909286f0a67aa55c2fbd" +dependencies = [ + "pkg-config", + "vcpkg", +] + +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "miniz_oxide" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "791daaae1ed6889560f8c4359194f56648355540573244a5448a83ba1ecc7435" +dependencies = [ + "adler32", +] + +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] +name = "pkg-config" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957056ecddbeba1b26965114e191d2e8589ce74db242b6ea25fc4062427a5c19" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548" + +[[package]] +name = "rusqlite" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38ee71cbab2c827ec0ac24e76f82eca723cee92c509a65f67dee393c25112" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "memchr", + "smallvec", +] + +[[package]] +name = "smallvec" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" + +[[package]] +name = "syn" +version = "1.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a1d708c221c5a612956ef9f75b37e454e88d1f7b899fbd3a18d4252012d663" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "thiserror" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "time" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +dependencies = [ + "libc", + "wasi", + "winapi", +] + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "vcpkg" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b00bca6106a5e23f3eee943593759b7fcddb00554332e856d990c893966879fb" + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "xml-rs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a" + +[[package]] +name = "zip" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8264fcea9b7a036a4a5103d7153e988dbc2ebbafb34f68a3c2d404b6b82d74b6" +dependencies = [ + "byteorder", + "bzip2", + "crc32fast", + "flate2", + "thiserror", + "time", +] diff --git a/calibre-indexer/Cargo.toml b/calibre-indexer/Cargo.toml new file mode 100644 index 0000000..4acc319 --- /dev/null +++ b/calibre-indexer/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "calibre-indexer" +version = "0.1.0" +authors = ["osmarks "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +rusqlite = "0.24" +anyhow = "1" +num_cpus = "1" +epub = "1" +xml-rs = "0.8" # TODO: faster XML parser +lazy_static = "1" \ No newline at end of file diff --git a/calibre-indexer/qxml.rs b/calibre-indexer/qxml.rs new file mode 100644 index 0000000..84977b7 --- /dev/null +++ b/calibre-indexer/qxml.rs @@ -0,0 +1,231 @@ +// Earlier version attempting to use quick-xml +// Dropped because SQLite appears to be what most of the time is spent in anyway, and because quick-xml had some issues wrt. escaping + +use std::fs; +use anyhow::{Result, Context}; +use crossbeam::channel::{bounded}; +use crossbeam::thread; +use std::path::PathBuf; +use rusqlite::{params, Connection}; +use std::fs::File; +use xml::reader::{EventReader, XmlEvent, ParserConfig}; +use quick_xml::{Reader, events::Event}; +use std::io::BufReader; +use epub::doc::EpubDoc; +use lazy_static::lazy_static; +use std::collections::HashMap; + +#[derive(Debug, Clone)] +struct BookMeta { + title: String, + author: String, + description: String +} + +#[derive(Debug, Clone, Copy, PartialEq)] +enum XMLReadState { + None, + ReadingTitle, + ReadingAuthor, + ReadingDescription +} + +lazy_static! { + static ref ESCAPES: HashMap, Vec> = { + let mut m = HashMap::new(); + m.insert(b"nbsp".to_vec(), b"\xc2\xa0".to_vec()); + m.insert(b"copy".to_vec(), b"\xc2\xa9".to_vec()); + m.insert(b"eacute".to_vec(), b"\xc3\x89".to_vec()); + m.insert(b"shy".to_vec(), b"\xc2\xad".to_vec()); + m.insert(b"iuml".to_vec(), b"\xc3\x8f".to_vec()); + m + }; +} + +// Extract text from an XHTML page in an ebook +// Ignores