diff --git a/.gitignore b/.gitignore
index a0caf95..536110d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,7 @@ meme-rater/images/
meme-rater/meta/
meme-rater/*.sqlite3*
meme-rater/deploy_for_training.sh
-node_modules/*
\ No newline at end of file
+node_modules/*
+node_modules
+*sqlite3*
+thumbtemp
\ No newline at end of file
diff --git a/.sqlx/query-0d5b91c01acf72be0cd78f1a0c58c417e06d7c4e53e1ec542243ccf2808bbab7.json b/.sqlx/query-0d5b91c01acf72be0cd78f1a0c58c417e06d7c4e53e1ec542243ccf2808bbab7.json
new file mode 100644
index 0000000..69b49d9
--- /dev/null
+++ b/.sqlx/query-0d5b91c01acf72be0cd78f1a0c58c417e06d7c4e53e1ec542243ccf2808bbab7.json
@@ -0,0 +1,12 @@
+{
+ "db_name": "SQLite",
+ "query": "INSERT OR IGNORE INTO files (filename) VALUES (?)",
+ "describe": {
+ "columns": [],
+ "parameters": {
+ "Right": 1
+ },
+ "nullable": []
+ },
+ "hash": "0d5b91c01acf72be0cd78f1a0c58c417e06d7c4e53e1ec542243ccf2808bbab7"
+}
diff --git a/.sqlx/query-63edaa9692deb1a9fb17d9e16905a299878bc0fe4af582c6791a411741ee41d9.json b/.sqlx/query-63edaa9692deb1a9fb17d9e16905a299878bc0fe4af582c6791a411741ee41d9.json
new file mode 100644
index 0000000..f6da11d
--- /dev/null
+++ b/.sqlx/query-63edaa9692deb1a9fb17d9e16905a299878bc0fe4af582c6791a411741ee41d9.json
@@ -0,0 +1,12 @@
+{
+ "db_name": "SQLite",
+ "query": "UPDATE files SET ocr = ?, raw_ocr_segments = ?, ocr_time = ? WHERE filename = ?",
+ "describe": {
+ "columns": [],
+ "parameters": {
+ "Right": 4
+ },
+ "nullable": []
+ },
+ "hash": "63edaa9692deb1a9fb17d9e16905a299878bc0fe4af582c6791a411741ee41d9"
+}
diff --git a/.sqlx/query-b6803e2443445de725290dde85de5b5ef87958bec4d6db6bd06660b71f7a1ad0.json b/.sqlx/query-b6803e2443445de725290dde85de5b5ef87958bec4d6db6bd06660b71f7a1ad0.json
new file mode 100644
index 0000000..208e746
--- /dev/null
+++ b/.sqlx/query-b6803e2443445de725290dde85de5b5ef87958bec4d6db6bd06660b71f7a1ad0.json
@@ -0,0 +1,12 @@
+{
+ "db_name": "SQLite",
+ "query": "UPDATE files SET thumbnails = ?, thumbnail_time = ? WHERE filename = ?",
+ "describe": {
+ "columns": [],
+ "parameters": {
+ "Right": 3
+ },
+ "nullable": []
+ },
+ "hash": "b6803e2443445de725290dde85de5b5ef87958bec4d6db6bd06660b71f7a1ad0"
+}
diff --git a/.sqlx/query-bed71d48c691bff7464b1aa767162df98ee2fcbe8df11f7db18a3647b2e0f1a2.json b/.sqlx/query-bed71d48c691bff7464b1aa767162df98ee2fcbe8df11f7db18a3647b2e0f1a2.json
new file mode 100644
index 0000000..d91e95a
--- /dev/null
+++ b/.sqlx/query-bed71d48c691bff7464b1aa767162df98ee2fcbe8df11f7db18a3647b2e0f1a2.json
@@ -0,0 +1,12 @@
+{
+ "db_name": "SQLite",
+ "query": "UPDATE files SET embedding_time = ?, embedding = ? WHERE filename = ?",
+ "describe": {
+ "columns": [],
+ "parameters": {
+ "Right": 3
+ },
+ "nullable": []
+ },
+ "hash": "bed71d48c691bff7464b1aa767162df98ee2fcbe8df11f7db18a3647b2e0f1a2"
+}
diff --git a/.sqlx/query-ec2da4ab11ede7a9a468ff3a50c55e0f6503fddd369f2c3031f39c0759bb97a0.json b/.sqlx/query-ec2da4ab11ede7a9a468ff3a50c55e0f6503fddd369f2c3031f39c0759bb97a0.json
new file mode 100644
index 0000000..b362e6b
--- /dev/null
+++ b/.sqlx/query-ec2da4ab11ede7a9a468ff3a50c55e0f6503fddd369f2c3031f39c0759bb97a0.json
@@ -0,0 +1,62 @@
+{
+ "db_name": "SQLite",
+ "query": "SELECT * FROM files WHERE filename = ?",
+ "describe": {
+ "columns": [
+ {
+ "name": "filename",
+ "ordinal": 0,
+ "type_info": "Text"
+ },
+ {
+ "name": "embedding_time",
+ "ordinal": 1,
+ "type_info": "Int64"
+ },
+ {
+ "name": "ocr_time",
+ "ordinal": 2,
+ "type_info": "Int64"
+ },
+ {
+ "name": "thumbnail_time",
+ "ordinal": 3,
+ "type_info": "Int64"
+ },
+ {
+ "name": "embedding",
+ "ordinal": 4,
+ "type_info": "Blob"
+ },
+ {
+ "name": "ocr",
+ "ordinal": 5,
+ "type_info": "Text"
+ },
+ {
+ "name": "raw_ocr_segments",
+ "ordinal": 6,
+ "type_info": "Blob"
+ },
+ {
+ "name": "thumbnails",
+ "ordinal": 7,
+ "type_info": "Blob"
+ }
+ ],
+ "parameters": {
+ "Right": 1
+ },
+ "nullable": [
+ false,
+ true,
+ true,
+ true,
+ true,
+ true,
+ true,
+ true
+ ]
+ },
+ "hash": "ec2da4ab11ede7a9a468ff3a50c55e0f6503fddd369f2c3031f39c0759bb97a0"
+}
diff --git a/.sqlx/query-ee6eca5b34c3fbf76cd10932db35c6d8631e48be9166c02b593020a17fcf2686.json b/.sqlx/query-ee6eca5b34c3fbf76cd10932db35c6d8631e48be9166c02b593020a17fcf2686.json
new file mode 100644
index 0000000..ecedbf9
--- /dev/null
+++ b/.sqlx/query-ee6eca5b34c3fbf76cd10932db35c6d8631e48be9166c02b593020a17fcf2686.json
@@ -0,0 +1,12 @@
+{
+ "db_name": "SQLite",
+ "query": "DELETE FROM files WHERE filename = ?",
+ "describe": {
+ "columns": [],
+ "parameters": {
+ "Right": 1
+ },
+ "nullable": []
+ },
+ "hash": "ee6eca5b34c3fbf76cd10932db35c6d8631e48be9166c02b593020a17fcf2686"
+}
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..543b422
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,3320 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "addr2line"
+version = "0.21.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
+
+[[package]]
+name = "ahash"
+version = "0.8.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
+dependencies = [
+ "cfg-if",
+ "getrandom",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "aligned-vec"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1"
+
+[[package]]
+name = "allocator-api2"
+version = "0.2.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
+
+[[package]]
+name = "android-tzdata"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.86"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
+
+[[package]]
+name = "arbitrary"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"
+
+[[package]]
+name = "arg_enum_proc_macro"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "arrayvec"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
+
+[[package]]
+name = "async-trait"
+version = "0.1.80"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "atoi"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
+[[package]]
+name = "autocfg"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
+
+[[package]]
+name = "av1-grain"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6678909d8c5d46a42abcf571271e15fdbc0a225e3646cf23762cd415046c78bf"
+dependencies = [
+ "anyhow",
+ "arrayvec",
+ "log",
+ "nom",
+ "num-rational",
+ "v_frame",
+]
+
+[[package]]
+name = "avif-serialize"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "876c75a42f6364451a033496a14c44bffe41f5f4a8236f697391f11024e596d2"
+dependencies = [
+ "arrayvec",
+]
+
+[[package]]
+name = "axum"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
+dependencies = [
+ "async-trait",
+ "axum-core",
+ "bytes",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "itoa",
+ "matchit",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "serde_json",
+ "serde_path_to_error",
+ "serde_urlencoded",
+ "sync_wrapper 1.0.1",
+ "tokio",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "rustversion",
+ "sync_wrapper 0.1.2",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "backtrace"
+version = "0.3.71"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d"
+dependencies = [
+ "addr2line",
+ "cc",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+]
+
+[[package]]
+name = "base64"
+version = "0.21.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
+
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
+[[package]]
+name = "base64ct"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
+
+[[package]]
+name = "bit_field"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bitflags"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bitstream-io"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c12d1856e42f0d817a835fe55853957c85c8c8a470114029143d3f12671446e"
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "built"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41bfbdb21256b87a8b5e80fab81a8eed158178e812fd7ba451907518b2742f16"
+
+[[package]]
+name = "bumpalo"
+version = "3.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
+
+[[package]]
+name = "bytemuck"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5"
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "byteorder-lite"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
+
+[[package]]
+name = "bytes"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
+
+[[package]]
+name = "cc"
+version = "1.0.98"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f"
+dependencies = [
+ "jobserver",
+ "libc",
+ "once_cell",
+]
+
+[[package]]
+name = "cfg-expr"
+version = "0.15.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d067ad48b8650848b989a59a86c6c36a995d02d2bf778d45c3c5d57bc2718f02"
+dependencies = [
+ "smallvec",
+ "target-lexicon",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "chrono"
+version = "0.4.38"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401"
+dependencies = [
+ "android-tzdata",
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "wasm-bindgen",
+ "windows-targets 0.52.5",
+]
+
+[[package]]
+name = "color_quant"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
+
+[[package]]
+name = "const-oid"
+version = "0.9.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
+
+[[package]]
+name = "core-foundation"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crc"
+version = "3.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636"
+dependencies = [
+ "crc-catalog",
+]
+
+[[package]]
+name = "crc-catalog"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
+
+[[package]]
+name = "crc32fast"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-queue"
+version = "0.3.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
+
+[[package]]
+name = "crunchy"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
+
+[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "der"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0"
+dependencies = [
+ "const-oid",
+ "pem-rfc7468",
+ "zeroize",
+]
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "const-oid",
+ "crypto-common",
+ "subtle",
+]
+
+[[package]]
+name = "dotenvy"
+version = "0.15.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
+
+[[package]]
+name = "either"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "encoding_rs"
+version = "0.8.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "env_logger"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580"
+dependencies = [
+ "humantime",
+ "is-terminal",
+ "log",
+ "regex",
+ "termcolor",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+
+[[package]]
+name = "errno"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "etcetera"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943"
+dependencies = [
+ "cfg-if",
+ "home",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "event-listener"
+version = "2.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
+
+[[package]]
+name = "exr"
+version = "1.72.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "887d93f60543e9a9362ef8a21beedd0a833c5d9610e18c67abe15a5963dcb1a4"
+dependencies = [
+ "bit_field",
+ "flume",
+ "half",
+ "lebe",
+ "miniz_oxide",
+ "rayon-core",
+ "smallvec",
+ "zune-inflate",
+]
+
+[[package]]
+name = "faiss"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3ffe048432786028b0a30aa1d13e10e08ced380439ba4a83fe5c227d2dd9733"
+dependencies = [
+ "faiss-sys",
+]
+
+[[package]]
+name = "faiss-sys"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b9c008fc56422bf34357f17226d9c5a5c2ef6245b4774759c5f67112e46915e"
+
+[[package]]
+name = "fastrand"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
+
+[[package]]
+name = "fdeflate"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f9bfee30e4dedf0ab8b422f03af778d9612b63f502710fc500a334ebe2de645"
+dependencies = [
+ "simd-adler32",
+]
+
+[[package]]
+name = "finl_unicode"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6"
+
+[[package]]
+name = "flate2"
+version = "1.0.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "flume"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+ "spin 0.9.8",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "foreign-types"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
+dependencies = [
+ "foreign-types-shared",
+]
+
+[[package]]
+name = "foreign-types-shared"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-intrusive"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f"
+dependencies = [
+ "futures-core",
+ "lock_api",
+ "parking_lot",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5"
+
+[[package]]
+name = "futures-task"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
+
+[[package]]
+name = "futures-util"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48"
+dependencies = [
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "gif"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fb2d69b19215e18bb912fa30f7ce15846e301408695e44e0ef719f1da9e19f2"
+dependencies = [
+ "color_quant",
+ "weezl",
+]
+
+[[package]]
+name = "gimli"
+version = "0.28.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
+
+[[package]]
+name = "h2"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "http",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "half"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+dependencies = [
+ "ahash",
+ "allocator-api2",
+]
+
+[[package]]
+name = "hashlink"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7"
+dependencies = [
+ "hashbrown",
+]
+
+[[package]]
+name = "heck"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+dependencies = [
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hermit-abi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
+
+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
+[[package]]
+name = "hkdf"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7"
+dependencies = [
+ "hmac",
+]
+
+[[package]]
+name = "hmac"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
+dependencies = [
+ "digest",
+]
+
+[[package]]
+name = "home"
+version = "0.5.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
+dependencies = [
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "http"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258"
+dependencies = [
+ "bytes",
+ "fnv",
+ "itoa",
+]
+
+[[package]]
+name = "http-body"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643"
+dependencies = [
+ "bytes",
+ "http",
+]
+
+[[package]]
+name = "http-body-util"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "httparse"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904"
+
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
+[[package]]
+name = "humantime"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+
+[[package]]
+name = "hyper"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-util",
+ "h2",
+ "http",
+ "http-body",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "smallvec",
+ "tokio",
+ "want",
+]
+
+[[package]]
+name = "hyper-tls"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
+dependencies = [
+ "bytes",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "native-tls",
+ "tokio",
+ "tokio-native-tls",
+ "tower-service",
+]
+
+[[package]]
+name = "hyper-util"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-util",
+ "http",
+ "http-body",
+ "hyper",
+ "pin-project-lite",
+ "socket2",
+ "tokio",
+ "tower",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "idna"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
+dependencies = [
+ "unicode-bidi",
+ "unicode-normalization",
+]
+
+[[package]]
+name = "image"
+version = "0.25.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd54d660e773627692c524beaad361aca785a4f9f5730ce91f42aabe5bce3d11"
+dependencies = [
+ "bytemuck",
+ "byteorder",
+ "color_quant",
+ "exr",
+ "gif",
+ "image-webp",
+ "num-traits",
+ "png",
+ "qoi",
+ "ravif",
+ "rayon",
+ "rgb",
+ "tiff",
+ "zune-core",
+ "zune-jpeg",
+]
+
+[[package]]
+name = "image-webp"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d730b085583c4d789dfd07fdcf185be59501666a90c97c40162b37e4fdad272d"
+dependencies = [
+ "byteorder-lite",
+ "thiserror",
+]
+
+[[package]]
+name = "imgref"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44feda355f4159a7c757171a77de25daf6411e217b4cabd03bd6650690468126"
+
+[[package]]
+name = "indexmap"
+version = "2.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "interpolate_name"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "ipnet"
+version = "2.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
+
+[[package]]
+name = "is-terminal"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "itertools"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
+
+[[package]]
+name = "jobserver"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "jpeg-decoder"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0"
+
+[[package]]
+name = "js-sys"
+version = "0.3.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
+dependencies = [
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "json5"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1"
+dependencies = [
+ "pest",
+ "pest_derive",
+ "serde",
+]
+
+[[package]]
+name = "kernel32-sys"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
+dependencies = [
+ "winapi",
+ "winapi-build",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+dependencies = [
+ "spin 0.5.2",
+]
+
+[[package]]
+name = "lebe"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8"
+
+[[package]]
+name = "libc"
+version = "0.2.155"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
+
+[[package]]
+name = "libfuzzer-sys"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7"
+dependencies = [
+ "arbitrary",
+ "cc",
+ "once_cell",
+]
+
+[[package]]
+name = "libm"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
+
+[[package]]
+name = "libsqlite3-sys"
+version = "0.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf4e226dcd58b4be396f7bd3c20da8fdee2911400705297ba7d2d7cc2c30f716"
+dependencies = [
+ "cc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
+
+[[package]]
+name = "lock_api"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
+
+[[package]]
+name = "loop9"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062"
+dependencies = [
+ "imgref",
+]
+
+[[package]]
+name = "matchit"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
+
+[[package]]
+name = "matrixmultiply"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2"
+dependencies = [
+ "autocfg",
+ "rawpointer",
+]
+
+[[package]]
+name = "maybe-rayon"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
+dependencies = [
+ "cfg-if",
+ "rayon",
+]
+
+[[package]]
+name = "md-5"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
+dependencies = [
+ "cfg-if",
+ "digest",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
+
+[[package]]
+name = "meme-search-engine"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "axum",
+ "base64 0.22.1",
+ "chrono",
+ "faiss",
+ "fnv",
+ "futures-util",
+ "half",
+ "image",
+ "json5",
+ "log",
+ "ndarray",
+ "num_cpus",
+ "pretty_env_logger",
+ "regex",
+ "reqwest",
+ "rmp-serde",
+ "serde",
+ "serde_bytes",
+ "serde_json",
+ "sqlx",
+ "tokio",
+ "tokio-stream",
+ "tower",
+ "tower-http",
+ "walkdir",
+]
+
+[[package]]
+name = "mime"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+
+[[package]]
+name = "mime_guess"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef"
+dependencies = [
+ "mime",
+ "unicase",
+]
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae"
+dependencies = [
+ "adler",
+ "simd-adler32",
+]
+
+[[package]]
+name = "mio"
+version = "0.8.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "native-tls"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e"
+dependencies = [
+ "lazy_static",
+ "libc",
+ "log",
+ "openssl",
+ "openssl-probe",
+ "openssl-sys",
+ "schannel",
+ "security-framework",
+ "security-framework-sys",
+ "tempfile",
+]
+
+[[package]]
+name = "ndarray"
+version = "0.15.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32"
+dependencies = [
+ "matrixmultiply",
+ "num-complex",
+ "num-integer",
+ "num-traits",
+ "rawpointer",
+]
+
+[[package]]
+name = "new_debug_unreachable"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "noop_proc_macro"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8"
+
+[[package]]
+name = "num-bigint"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-bigint-dig"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151"
+dependencies = [
+ "byteorder",
+ "lazy_static",
+ "libm",
+ "num-integer",
+ "num-iter",
+ "num-traits",
+ "rand",
+ "smallvec",
+ "zeroize",
+]
+
+[[package]]
+name = "num-complex"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-derive"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-iter"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
+dependencies = [
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
+[[package]]
+name = "object"
+version = "0.32.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
+
+[[package]]
+name = "openssl"
+version = "0.10.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f"
+dependencies = [
+ "bitflags 2.5.0",
+ "cfg-if",
+ "foreign-types",
+ "libc",
+ "once_cell",
+ "openssl-macros",
+ "openssl-sys",
+]
+
+[[package]]
+name = "openssl-macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "openssl-probe"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
+
+[[package]]
+name = "openssl-sys"
+version = "0.9.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall 0.5.1",
+ "smallvec",
+ "windows-targets 0.52.5",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "pem-rfc7468"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412"
+dependencies = [
+ "base64ct",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
+
+[[package]]
+name = "pest"
+version = "2.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "560131c633294438da9f7c4b08189194b20946c8274c6b9e38881a7874dc8ee8"
+dependencies = [
+ "memchr",
+ "thiserror",
+ "ucd-trie",
+]
+
+[[package]]
+name = "pest_derive"
+version = "2.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26293c9193fbca7b1a3bf9b79dc1e388e927e6cacaa78b4a3ab705a1d3d41459"
+dependencies = [
+ "pest",
+ "pest_generator",
+]
+
+[[package]]
+name = "pest_generator"
+version = "2.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ec22af7d3fb470a85dd2ca96b7c577a1eb4ef6f1683a9fe9a8c16e136c04687"
+dependencies = [
+ "pest",
+ "pest_meta",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "pest_meta"
+version = "2.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7a240022f37c361ec1878d646fc5b7d7c4d28d5946e1a80ad5a7a4f4ca0bdcd"
+dependencies = [
+ "once_cell",
+ "pest",
+ "sha2",
+]
+
+[[package]]
+name = "pin-project"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3"
+dependencies = [
+ "pin-project-internal",
+]
+
+[[package]]
+name = "pin-project-internal"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkcs1"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f"
+dependencies = [
+ "der",
+ "pkcs8",
+ "spki",
+]
+
+[[package]]
+name = "pkcs8"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
+dependencies = [
+ "der",
+ "spki",
+]
+
+[[package]]
+name = "pkg-config"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
+
+[[package]]
+name = "png"
+version = "0.17.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06e4b0d3d1312775e782c86c91a111aa1f910cbb65e1337f9975b5f9a554b5e1"
+dependencies = [
+ "bitflags 1.3.2",
+ "crc32fast",
+ "fdeflate",
+ "flate2",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+
+[[package]]
+name = "pretty_env_logger"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c"
+dependencies = [
+ "env_logger",
+ "log",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "profiling"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d84d1d7a6ac92673717f9f6d1518374ef257669c24ebc5ac25d5033828be58"
+dependencies = [
+ "profiling-procmacros",
+]
+
+[[package]]
+name = "profiling-procmacros"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd"
+dependencies = [
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "qoi"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001"
+dependencies = [
+ "bytemuck",
+]
+
+[[package]]
+name = "quick-error"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
+
+[[package]]
+name = "quote"
+version = "1.0.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rav1e"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd87ce80a7665b1cce111f8a16c1f3929f6547ce91ade6addf4ec86a8dda5ce9"
+dependencies = [
+ "arbitrary",
+ "arg_enum_proc_macro",
+ "arrayvec",
+ "av1-grain",
+ "bitstream-io",
+ "built",
+ "cfg-if",
+ "interpolate_name",
+ "itertools",
+ "libc",
+ "libfuzzer-sys",
+ "log",
+ "maybe-rayon",
+ "new_debug_unreachable",
+ "noop_proc_macro",
+ "num-derive",
+ "num-traits",
+ "once_cell",
+ "paste",
+ "profiling",
+ "rand",
+ "rand_chacha",
+ "simd_helpers",
+ "system-deps",
+ "thiserror",
+ "v_frame",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "ravif"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc13288f5ab39e6d7c9d501759712e6969fcc9734220846fc9ed26cae2cc4234"
+dependencies = [
+ "avif-serialize",
+ "imgref",
+ "loop9",
+ "quick-error",
+ "rav1e",
+ "rayon",
+ "rgb",
+]
+
+[[package]]
+name = "rawpointer"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
+
+[[package]]
+name = "rayon"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
+dependencies = [
+ "bitflags 1.3.2",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e"
+dependencies = [
+ "bitflags 2.5.0",
+]
+
+[[package]]
+name = "regex"
+version = "1.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
+
+[[package]]
+name = "reqwest"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10"
+dependencies = [
+ "base64 0.22.1",
+ "bytes",
+ "encoding_rs",
+ "futures-core",
+ "futures-util",
+ "h2",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-tls",
+ "hyper-util",
+ "ipnet",
+ "js-sys",
+ "log",
+ "mime",
+ "mime_guess",
+ "native-tls",
+ "once_cell",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustls-pemfile",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "sync_wrapper 0.1.2",
+ "system-configuration",
+ "tokio",
+ "tokio-native-tls",
+ "tower-service",
+ "url",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+ "winreg",
+]
+
+[[package]]
+name = "rgb"
+version = "0.8.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8"
+dependencies = [
+ "bytemuck",
+]
+
+[[package]]
+name = "rmp"
+version = "0.8.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4"
+dependencies = [
+ "byteorder",
+ "num-traits",
+ "paste",
+]
+
+[[package]]
+name = "rmp-serde"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db"
+dependencies = [
+ "byteorder",
+ "rmp",
+ "serde",
+]
+
+[[package]]
+name = "rsa"
+version = "0.9.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc"
+dependencies = [
+ "const-oid",
+ "digest",
+ "num-bigint-dig",
+ "num-integer",
+ "num-traits",
+ "pkcs1",
+ "pkcs8",
+ "rand_core",
+ "signature",
+ "spki",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
+
+[[package]]
+name = "rustix"
+version = "0.38.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
+dependencies = [
+ "bitflags 2.5.0",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "rustls-pemfile"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d"
+dependencies = [
+ "base64 0.22.1",
+ "rustls-pki-types",
+]
+
+[[package]]
+name = "rustls-pki-types"
+version = "1.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d"
+
+[[package]]
+name = "rustversion"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
+
+[[package]]
+name = "ryu"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
+
+[[package]]
+name = "same-file"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d931a44fdaa43b8637009e7632a02adc4f2b2e0733c08caa4cf00e8da4a117a7"
+dependencies = [
+ "kernel32-sys",
+ "winapi",
+]
+
+[[package]]
+name = "schannel"
+version = "0.1.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534"
+dependencies = [
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "security-framework"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0"
+dependencies = [
+ "bitflags 2.5.0",
+ "core-foundation",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework-sys"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.202"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_bytes"
+version = "0.11.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b8497c313fd43ab992087548117643f6fcd935cbf36f176ffda0aacf9591734"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.202"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "serde_path_to_error"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6"
+dependencies = [
+ "itoa",
+ "serde",
+]
+
+[[package]]
+name = "serde_spanned"
+version = "0.6.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "sha1"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
+[[package]]
+name = "sha2"
+version = "0.10.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "signature"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
+dependencies = [
+ "digest",
+ "rand_core",
+]
+
+[[package]]
+name = "simd-adler32"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
+
+[[package]]
+name = "simd_helpers"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6"
+dependencies = [
+ "quote",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "smallvec"
+version = "1.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
+
+[[package]]
+name = "socket2"
+version = "0.5.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "spin"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
+
+[[package]]
+name = "spin"
+version = "0.9.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
+dependencies = [
+ "lock_api",
+]
+
+[[package]]
+name = "spki"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
+dependencies = [
+ "base64ct",
+ "der",
+]
+
+[[package]]
+name = "sqlformat"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c"
+dependencies = [
+ "itertools",
+ "nom",
+ "unicode_categories",
+]
+
+[[package]]
+name = "sqlx"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9a2ccff1a000a5a59cd33da541d9f2fdcd9e6e8229cc200565942bff36d0aaa"
+dependencies = [
+ "sqlx-core",
+ "sqlx-macros",
+ "sqlx-mysql",
+ "sqlx-postgres",
+ "sqlx-sqlite",
+]
+
+[[package]]
+name = "sqlx-core"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24ba59a9342a3d9bab6c56c118be528b27c9b60e490080e9711a04dccac83ef6"
+dependencies = [
+ "ahash",
+ "atoi",
+ "byteorder",
+ "bytes",
+ "crc",
+ "crossbeam-queue",
+ "either",
+ "event-listener",
+ "futures-channel",
+ "futures-core",
+ "futures-intrusive",
+ "futures-io",
+ "futures-util",
+ "hashlink",
+ "hex",
+ "indexmap",
+ "log",
+ "memchr",
+ "once_cell",
+ "paste",
+ "percent-encoding",
+ "serde",
+ "serde_json",
+ "sha2",
+ "smallvec",
+ "sqlformat",
+ "thiserror",
+ "tokio",
+ "tokio-stream",
+ "tracing",
+ "url",
+]
+
+[[package]]
+name = "sqlx-macros"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ea40e2345eb2faa9e1e5e326db8c34711317d2b5e08d0d5741619048a803127"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "sqlx-core",
+ "sqlx-macros-core",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "sqlx-macros-core"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5833ef53aaa16d860e92123292f1f6a3d53c34ba8b1969f152ef1a7bb803f3c8"
+dependencies = [
+ "dotenvy",
+ "either",
+ "heck 0.4.1",
+ "hex",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "serde_json",
+ "sha2",
+ "sqlx-core",
+ "sqlx-mysql",
+ "sqlx-sqlite",
+ "syn 1.0.109",
+ "tempfile",
+ "tokio",
+ "url",
+]
+
+[[package]]
+name = "sqlx-mysql"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ed31390216d20e538e447a7a9b959e06ed9fc51c37b514b46eb758016ecd418"
+dependencies = [
+ "atoi",
+ "base64 0.21.7",
+ "bitflags 2.5.0",
+ "byteorder",
+ "bytes",
+ "crc",
+ "digest",
+ "dotenvy",
+ "either",
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-util",
+ "generic-array",
+ "hex",
+ "hkdf",
+ "hmac",
+ "itoa",
+ "log",
+ "md-5",
+ "memchr",
+ "once_cell",
+ "percent-encoding",
+ "rand",
+ "rsa",
+ "serde",
+ "sha1",
+ "sha2",
+ "smallvec",
+ "sqlx-core",
+ "stringprep",
+ "thiserror",
+ "tracing",
+ "whoami",
+]
+
+[[package]]
+name = "sqlx-postgres"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c824eb80b894f926f89a0b9da0c7f435d27cdd35b8c655b114e58223918577e"
+dependencies = [
+ "atoi",
+ "base64 0.21.7",
+ "bitflags 2.5.0",
+ "byteorder",
+ "crc",
+ "dotenvy",
+ "etcetera",
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-util",
+ "hex",
+ "hkdf",
+ "hmac",
+ "home",
+ "itoa",
+ "log",
+ "md-5",
+ "memchr",
+ "once_cell",
+ "rand",
+ "serde",
+ "serde_json",
+ "sha2",
+ "smallvec",
+ "sqlx-core",
+ "stringprep",
+ "thiserror",
+ "tracing",
+ "whoami",
+]
+
+[[package]]
+name = "sqlx-sqlite"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b244ef0a8414da0bed4bb1910426e890b19e5e9bccc27ada6b797d05c55ae0aa"
+dependencies = [
+ "atoi",
+ "flume",
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-intrusive",
+ "futures-util",
+ "libsqlite3-sys",
+ "log",
+ "percent-encoding",
+ "serde",
+ "sqlx-core",
+ "tracing",
+ "url",
+ "urlencoding",
+]
+
+[[package]]
+name = "stringprep"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb41d74e231a107a1b4ee36bd1214b11285b77768d2e3824aedafa988fd36ee6"
+dependencies = [
+ "finl_unicode",
+ "unicode-bidi",
+ "unicode-normalization",
+]
+
+[[package]]
+name = "subtle"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.65"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "sync_wrapper"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
+
+[[package]]
+name = "sync_wrapper"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
+
+[[package]]
+name = "system-configuration"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
+dependencies = [
+ "bitflags 1.3.2",
+ "core-foundation",
+ "system-configuration-sys",
+]
+
+[[package]]
+name = "system-configuration-sys"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "system-deps"
+version = "6.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3e535eb8dded36d55ec13eddacd30dec501792ff23a0b1682c38601b8cf2349"
+dependencies = [
+ "cfg-expr",
+ "heck 0.5.0",
+ "pkg-config",
+ "toml",
+ "version-compare",
+]
+
+[[package]]
+name = "target-lexicon"
+version = "0.12.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f"
+
+[[package]]
+name = "tempfile"
+version = "3.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1"
+dependencies = [
+ "cfg-if",
+ "fastrand",
+ "rustix",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "termcolor"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.61"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.61"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "tiff"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba1310fcea54c6a9a4fd1aad794ecc02c31682f6bfbecdf460bf19533eed1e3e"
+dependencies = [
+ "flate2",
+ "jpeg-decoder",
+ "weezl",
+]
+
+[[package]]
+name = "tinyvec"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
+[[package]]
+name = "tokio"
+version = "1.37.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787"
+dependencies = [
+ "backtrace",
+ "bytes",
+ "libc",
+ "mio",
+ "num_cpus",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "tokio-native-tls"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
+dependencies = [
+ "native-tls",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-stream"
+version = "0.1.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "toml"
+version = "0.8.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4e43f8cc456c9704c851ae29c67e17ef65d2c30017c17a9765b89c382dc8bba"
+dependencies = [
+ "serde",
+ "serde_spanned",
+ "toml_datetime",
+ "toml_edit",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "0.6.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "toml_edit"
+version = "0.22.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c127785850e8c20836d49732ae6abfa47616e60bf9d9f57c43c250361a9db96c"
+dependencies = [
+ "indexmap",
+ "serde",
+ "serde_spanned",
+ "toml_datetime",
+ "winnow",
+]
+
+[[package]]
+name = "tower"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project",
+ "pin-project-lite",
+ "tokio",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5"
+dependencies = [
+ "bitflags 2.5.0",
+ "bytes",
+ "http",
+ "http-body",
+ "http-body-util",
+ "pin-project-lite",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0"
+
+[[package]]
+name = "tower-service"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
+
+[[package]]
+name = "tracing"
+version = "0.1.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
+dependencies = [
+ "log",
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "try-lock"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
+
+[[package]]
+name = "typenum"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
+
+[[package]]
+name = "ucd-trie"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9"
+
+[[package]]
+name = "unicase"
+version = "2.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89"
+dependencies = [
+ "version_check",
+]
+
+[[package]]
+name = "unicode-bidi"
+version = "0.3.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+
+[[package]]
+name = "unicode-normalization"
+version = "0.1.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
+
+[[package]]
+name = "unicode_categories"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
+
+[[package]]
+name = "url"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+]
+
+[[package]]
+name = "urlencoding"
+version = "2.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
+
+[[package]]
+name = "v_frame"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6f32aaa24bacd11e488aa9ba66369c7cd514885742c9fe08cfe85884db3e92b"
+dependencies = [
+ "aligned-vec",
+ "num-traits",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "version-compare"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b"
+
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
+name = "walkdir"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb08f9e670fab86099470b97cd2b252d6527f0b3cc1401acdb595ffc9dd288ff"
+dependencies = [
+ "kernel32-sys",
+ "same-file",
+ "winapi",
+]
+
+[[package]]
+name = "want"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
+dependencies = [
+ "try-lock",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
+[[package]]
+name = "wasite"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
+dependencies = [
+ "bumpalo",
+ "log",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.92"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
+
+[[package]]
+name = "web-sys"
+version = "0.3.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "weezl"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082"
+
+[[package]]
+name = "whoami"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a44ab49fad634e88f55bf8f9bb3abd2f27d7204172a112c7c9987e01c1c94ea9"
+dependencies = [
+ "redox_syscall 0.4.1",
+ "wasite",
+]
+
+[[package]]
+name = "winapi"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
+
+[[package]]
+name = "winapi-build"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
+dependencies = [
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
+dependencies = [
+ "windows-targets 0.52.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
+dependencies = [
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
+dependencies = [
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.5",
+ "windows_aarch64_msvc 0.52.5",
+ "windows_i686_gnu 0.52.5",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc 0.52.5",
+ "windows_x86_64_gnu 0.52.5",
+ "windows_x86_64_gnullvm 0.52.5",
+ "windows_x86_64_msvc 0.52.5",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
+
+[[package]]
+name = "winnow"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3c52e9c97a68071b23e836c9380edae937f17b9c4667bd021973efc689f618d"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "winreg"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5"
+dependencies = [
+ "cfg-if",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.7.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.7.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.65",
+]
+
+[[package]]
+name = "zeroize"
+version = "1.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d"
+
+[[package]]
+name = "zune-core"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a"
+
+[[package]]
+name = "zune-inflate"
+version = "0.2.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02"
+dependencies = [
+ "simd-adler32",
+]
+
+[[package]]
+name = "zune-jpeg"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec866b44a2a1fd6133d363f073ca1b179f438f99e7e5bfb1e33f7181facfe448"
+dependencies = [
+ "zune-core",
+]
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..f84055e
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,34 @@
+[package]
+name = "meme-search-engine"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+tokio = { version = "1", features = ["full"] }
+axum = "0.7"
+image = { version = "0.25", features = ["avif"] }
+reqwest = { version = "0.12", features = ["multipart"] }
+serde = { version = "1", features = ["derive"] }
+sqlx = { version = "0.7", features = ["runtime-tokio", "sqlite"] }
+walkdir = "1"
+log = "0.4"
+rmp-serde = "1"
+serde_json = "1"
+chrono = "0.4"
+base64 = "0.22"
+anyhow = "1"
+fnv = "1"
+faiss = "0.12"
+ndarray = "0.15"
+half = { version = "2" }
+regex = "1"
+pretty_env_logger = "0.5"
+futures-util = "0.3"
+tokio-stream = "0.1"
+num_cpus = "1"
+serde_bytes = "0.11"
+tower-http = { version = "0.5", features = ["cors"] }
+tower = "0.4"
+json5 = "0.4"
\ No newline at end of file
diff --git a/clipfront2/src/App.svelte b/clipfront2/src/App.svelte
index f0e637a..fd7d042 100644
--- a/clipfront2/src/App.svelte
+++ b/clipfront2/src/App.svelte
@@ -62,6 +62,8 @@
.result
border: 1px solid gray
+ *
+ display: block
.result img
width: 100%
@@ -109,17 +111,22 @@
{/if}
{#if results}
+ {#if displayedResults.length === 0}
+ No results. Wait for index rebuild.
+ {/if}
{#each displayedResults as result}
{#key result.file}
@@ -171,9 +178,7 @@
let displayedResults = []
const runSearch = async () => {
if (!resultPromise) {
- let args = {}
- args.text = queryTerms.filter(x => x.type === "text" && x.text).map(({ text, weight, sign }) => [ text, weight * { "+": 1, "-": -1 }[sign] ])
- args.images = queryTerms.filter(x => x.type === "image").map(({ imageData, weight, sign }) => [ imageData, weight * { "+": 1, "-": -1 }[sign] ])
+ let args = {"terms": queryTerms.map(x => ({ image: x.imageData, text: x.text, weight: x.weight * { "+": 1, "-": -1 }[x.sign] }))}
resultPromise = util.doQuery(args).then(res => {
error = null
results = res
@@ -181,7 +186,8 @@
displayedResults = []
pendingImageLoads = 0
for (let i = 0; i < chunkSize; i++) {
- displayedResults.push(results[i])
+ if (i >= results.matches.length) break
+ displayedResults.push(results.matches[i])
pendingImageLoads += 1
}
redrawGrid()
@@ -195,7 +201,8 @@
if (window.scrollY + window.innerHeight < heightThreshold) return;
let init = displayedResults.length
for (let i = 0; i < chunkSize; i++) {
- displayedResults.push(results[init + i])
+ if (init + i >= results.matches.length) break
+ displayedResults.push(results.matches[init + i])
pendingImageLoads += 1
}
displayedResults = displayedResults
diff --git a/clipfront2/src/build.js b/clipfront2/src/build.js
index 6037466..2ade5ba 100644
--- a/clipfront2/src/build.js
+++ b/clipfront2/src/build.js
@@ -7,7 +7,7 @@ esbuild
.build({
entryPoints: [path.join(__dirname, "app.js")],
bundle: true,
- minify: true,
+ minify: false,
outfile: path.join(__dirname, "../static/app.js"),
plugins: [sveltePlugin({
preprocess: {
diff --git a/clipfront2/src/util.js b/clipfront2/src/util.js
index 502916c..50fc4c7 100644
--- a/clipfront2/src/util.js
+++ b/clipfront2/src/util.js
@@ -1,7 +1,8 @@
import * as config from "../../frontend_config.json"
+import * as backendConfig from "../../mse_config.json"
import * as formats from "../../formats.json"
-export const getURL = x => config.image_path + x
+export const getURL = x => config.image_path + x[1]
export const doQuery = args => fetch(config.backend_url, {
method: "POST",
@@ -11,15 +12,11 @@ export const doQuery = args => fetch(config.backend_url, {
body: JSON.stringify(args)
}).then(x => x.json())
-const filesafeCharset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-"
-export const thumbnailPath = (originalPath, format) => {
- const extension = formats.formats[format][0]
- // Python and JS have minor differences in string handling wrt. astral characters which could result in incorrect quantities of dashes. Fortunately, Array.from handles this correctly.
- return config.thumb_path + `${Array.from(originalPath).map(x => filesafeCharset.includes(x) ? x : "_").join("")}.${format}${extension}`
+export const hasFormat = (results, result, format) => {
+ return result[3] && (1 << results.formats.indexOf(format)) !== 0
}
-const thumbedExtensions = formats.extensions
-export const hasThumbnails = t => {
- const parts = t.split(".")
- return thumbedExtensions.includes("." + parts[parts.length - 1])
+export const thumbnailURL = (results, result, format) => {
+ console.log("RES", results)
+ return `${config.thumb_path}${result[2]}${format}.${results.extensions[format]}`
}
\ No newline at end of file
diff --git a/frontend_config.json b/frontend_config.json
index 2eb8b12..3406307 100644
--- a/frontend_config.json
+++ b/frontend_config.json
@@ -1,5 +1,5 @@
{
- "backend_url": "https://mse.osmarks.net/backend",
- "image_path": "https://i2.osmarks.net/memes-or-something/",
- "thumb_path": "https://i2.osmarks.net/thumbs/memes-or-something_"
+ "backend_url": "http://localhost:1707/",
+ "image_path": "http://localhost:7858/",
+ "thumb_path": "http://localhost:7857/"
}
\ No newline at end of file
diff --git a/misc/bad-go-version/go.mod b/misc/bad-go-version/go.mod
new file mode 100644
index 0000000..5dd9b45
--- /dev/null
+++ b/misc/bad-go-version/go.mod
@@ -0,0 +1,26 @@
+module meme-search
+
+go 1.22.2
+
+require (
+ github.com/DataIntelligenceCrew/go-faiss v0.2.0
+ github.com/jmoiron/sqlx v1.4.0
+ github.com/mattn/go-sqlite3 v1.14.22
+ github.com/samber/lo v1.39.0
+ github.com/titanous/json5 v1.0.0
+ github.com/vmihailenco/msgpack v4.0.4+incompatible
+ github.com/x448/float16 v0.8.4
+ golang.org/x/sync v0.7.0
+)
+
+require (
+ github.com/davidbyttow/govips/v2 v2.14.0 // indirect
+ github.com/golang/protobuf v1.5.2 // indirect
+ github.com/h2non/bimg v1.1.9 // indirect
+ golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect
+ golang.org/x/image v0.16.0 // indirect
+ golang.org/x/net v0.25.0 // indirect
+ golang.org/x/text v0.15.0 // indirect
+ google.golang.org/appengine v1.6.8 // indirect
+ google.golang.org/protobuf v1.26.0 // indirect
+)
diff --git a/misc/bad-go-version/go.sum b/misc/bad-go-version/go.sum
new file mode 100644
index 0000000..02a29a2
--- /dev/null
+++ b/misc/bad-go-version/go.sum
@@ -0,0 +1,100 @@
+filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
+github.com/DataIntelligenceCrew/go-faiss v0.2.0 h1:c0pxAr0vldXIuE4DZnqpl6FuuH1uZd45d+NiQHKg1uU=
+github.com/DataIntelligenceCrew/go-faiss v0.2.0/go.mod h1:4Gi7G3PF78IwZigTL2M1AJXOaAgxyL66vCqUYVaNgwk=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davidbyttow/govips/v2 v2.14.0 h1:il3pX0XMZ5nlwipkFJHRZ3vGzcdXWApARalJxNpRHJU=
+github.com/davidbyttow/govips/v2 v2.14.0/go.mod h1:eglyvgm65eImDiJJk4wpj9LSz4pWivPzWgDqkxWJn5k=
+github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
+github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/h2non/bimg v1.1.9 h1:WH20Nxko9l/HFm4kZCA3Phbgu2cbHvYzxwxn9YROEGg=
+github.com/h2non/bimg v1.1.9/go.mod h1:R3+UiYwkK4rQl6KVFTOFJHitgLbZXBZNFh2cv3AEbp8=
+github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
+github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=
+github.com/json5/json5-go v0.0.0-20160331055859-40c2958e3bf8 h1:BQuwfXQRDQMI8YNqINKNlFV23P0h07ZvOQAtezAEsP8=
+github.com/json5/json5-go v0.0.0-20160331055859-40c2958e3bf8/go.mod h1:7n1PdYNh4RIHTvILru80IEstTADqQz/wmjeNXTcC9rA=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
+github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
+github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA=
+github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw=
+github.com/thoas/go-funk v0.9.3/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q=
+github.com/titanous/json5 v1.0.0 h1:hJf8Su1d9NuI/ffpxgxQfxh/UiBFZX7bMPid0rIL/7s=
+github.com/titanous/json5 v1.0.0/go.mod h1:7JH1M8/LHKc6cyP5o5g3CSaRj+mBrIimTxzpvmckH8c=
+github.com/vmihailenco/msgpack v4.0.4+incompatible h1:dSLoQfGFAo3F6OoNhwUmLwVgaUXK79GlxNBwueZn0xI=
+github.com/vmihailenco/msgpack v4.0.4+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
+golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM=
+golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE=
+golang.org/x/image v0.10.0/go.mod h1:jtrku+n79PfroUbvDdeUWMAI+heR786BofxrbiSF+J0=
+golang.org/x/image v0.16.0 h1:9kloLAKhUufZhA12l5fwnx2NZW39/we1UhBesW433jw=
+golang.org/x/image v0.16.0/go.mod h1:ugSZItdV4nOxyqp56HmXwH0Ry0nBCpjnZdpDaIHdoPs=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM=
+google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/misc/bad-go-version/meme_search.go b/misc/bad-go-version/meme_search.go
new file mode 100644
index 0000000..81fc2b7
--- /dev/null
+++ b/misc/bad-go-version/meme_search.go
@@ -0,0 +1,877 @@
+package main
+
+import (
+ "bytes"
+ "encoding/base64"
+ "encoding/json"
+ "fmt"
+ "hash/fnv"
+ "io"
+ "log"
+ "net/http"
+ "os"
+ "path/filepath"
+ "runtime"
+ "runtime/pprof"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/DataIntelligenceCrew/go-faiss"
+ "github.com/h2non/bimg"
+ "github.com/jmoiron/sqlx"
+ _ "github.com/mattn/go-sqlite3"
+ "github.com/samber/lo"
+ "github.com/vmihailenco/msgpack"
+ "github.com/x448/float16"
+ "golang.org/x/sync/errgroup"
+)
+
+type Config struct {
+ ClipServer string `json:"clip_server"`
+ DbPath string `json:"db_path"`
+ Port int16 `json:"port"`
+ Files string `json:"files"`
+ EnableOCR bool `json:"enable_ocr"`
+ ThumbsPath string `json:"thumbs_path"`
+ EnableThumbnails bool `json:"enable_thumbs"`
+}
+
+type Index struct {
+ vectors *faiss.IndexImpl
+ filenames []string
+ formatCodes []int64
+ formatNames []string
+}
+
+var schema = `
+CREATE TABLE IF NOT EXISTS files (
+ filename TEXT PRIMARY KEY,
+ embedding_time INTEGER,
+ ocr_time INTEGER,
+ thumbnail_time INTEGER,
+ embedding BLOB,
+ ocr TEXT,
+ raw_ocr_segments BLOB,
+ thumbnails BLOB
+);
+
+CREATE VIRTUAL TABLE IF NOT EXISTS ocr_fts USING fts5 (
+ filename,
+ ocr,
+ tokenize='unicode61 remove_diacritics 2',
+ content='ocr'
+);
+
+CREATE TRIGGER IF NOT EXISTS ocr_fts_ins AFTER INSERT ON files BEGIN
+ INSERT INTO ocr_fts (rowid, filename, ocr) VALUES (new.rowid, new.filename, COALESCE(new.ocr, ''));
+END;
+
+CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER DELETE ON files BEGIN
+ INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, ''));
+END;
+
+CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER UPDATE ON files BEGIN
+ INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, ''));
+ INSERT INTO ocr_fts (rowid, filename, text) VALUES (new.rowid, new.filename, COALESCE(new.ocr, ''));
+END;
+`
+
+type FileRecord struct {
+ Filename string `db:"filename"`
+ EmbedTime int64 `db:"embedding_time"`
+ OcrTime int64 `db:"ocr_time"`
+ ThumbnailTime int64 `db:"thumbnail_time"`
+ Embedding []byte `db:"embedding"`
+ Ocr string `db:"ocr"`
+ RawOcrSegments []byte `db:"raw_ocr_segments"`
+ Thumbnails []byte `db:"thumbnails"`
+}
+
+type InferenceServerConfig struct {
+ BatchSize uint `msgpack:"batch"`
+ ImageSize []uint `msgpack:"image_size"`
+ EmbeddingSize uint `msgpack:"embedding_size"`
+}
+
+func decodeMsgpackFrom[O interface{}](resp *http.Response) (O, error) {
+ var result O
+ respData, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return result, err
+ }
+ err = msgpack.Unmarshal(respData, &result)
+ return result, err
+}
+
+func queryClipServer[I interface{}, O interface{}](config Config, path string, data I) (O, error) {
+ var result O
+ b, err := msgpack.Marshal(data)
+ if err != nil {
+ return result, err
+ }
+ resp, err := http.Post(config.ClipServer+path, "application/msgpack", bytes.NewReader(b))
+ if err != nil {
+ return result, err
+ }
+ defer resp.Body.Close()
+ return decodeMsgpackFrom[O](resp)
+}
+
+type LoadedImage struct {
+ image *bimg.Image
+ filename string
+ originalSize int
+}
+
+type EmbeddingInput struct {
+ image []byte
+ filename string
+}
+
+type EmbeddingRequest struct {
+ Images [][]byte `msgpack:"images"`
+ Text []string `msgpack:"text"`
+}
+
+type EmbeddingResponse = [][]byte
+
+func timestamp() int64 {
+ return time.Now().UnixMicro()
+}
+
+type ImageFormatConfig struct {
+ targetWidth int
+ targetFilesize int
+ quality int
+ format bimg.ImageType
+ extension string
+}
+
+func generateFilenameHash(filename string) string {
+ hasher := fnv.New128()
+ hasher.Write([]byte(filename))
+ hash := hasher.Sum(make([]byte, 0))
+ return base64.RawURLEncoding.EncodeToString(hash)
+}
+
+func generateThumbnailFilename(filename string, formatName string, formatConfig ImageFormatConfig) string {
+ return fmt.Sprintf("%s%s.%s", generateFilenameHash(filename), formatName, formatConfig.extension)
+}
+
+func initializeDatabase(config Config) (*sqlx.DB, error) {
+ db, err := sqlx.Connect("sqlite3", config.DbPath)
+ if err != nil {
+ return nil, err
+ }
+ _, err = db.Exec("PRAGMA busy_timeout = 2000; PRAGMA journal_mode = WAL")
+ if err != nil {
+ return nil, err
+ }
+ return db, nil
+}
+
+func imageFormats(config Config) map[string]ImageFormatConfig {
+ return map[string]ImageFormatConfig{
+ "jpegl": {
+ targetWidth: 800,
+ quality: 70,
+ format: bimg.JPEG,
+ extension: "jpg",
+ },
+ "jpegh": {
+ targetWidth: 1600,
+ quality: 80,
+ format: bimg.JPEG,
+ extension: "jpg",
+ },
+ "jpeg256kb": {
+ targetWidth: 500,
+ targetFilesize: 256000,
+ format: bimg.JPEG,
+ extension: "jpg",
+ },
+ "avifh": {
+ targetWidth: 1600,
+ quality: 80,
+ format: bimg.AVIF,
+ extension: "avif",
+ },
+ "avifl": {
+ targetWidth: 800,
+ quality: 30,
+ format: bimg.AVIF,
+ extension: "avif",
+ },
+ }
+}
+
+func ingestFiles(config Config, backend InferenceServerConfig) error {
+ var wg errgroup.Group
+ var iwg errgroup.Group
+
+ // We assume everything is either a modern browser (low-DPI or high-DPI), an ancient browser or a ComputerCraft machine abusing Extra Utilities 2 screens.
+ var formats = imageFormats(config)
+
+ db, err := initializeDatabase(config)
+ if err != nil {
+ return err
+ }
+ defer db.Close()
+
+ toProcess := make(chan FileRecord, 100)
+ toEmbed := make(chan EmbeddingInput, backend.BatchSize)
+ toThumbnail := make(chan LoadedImage, 30)
+ toOCR := make(chan LoadedImage, 30)
+ embedBatches := make(chan []EmbeddingInput, 1)
+
+ // image loading and preliminary resizing
+ for range runtime.NumCPU() {
+ iwg.Go(func() error {
+ for record := range toProcess {
+ path := filepath.Join(config.Files, record.Filename)
+ buffer, err := bimg.Read(path)
+ if err != nil {
+ log.Println("could not read ", record.Filename)
+ }
+ img := bimg.NewImage(buffer)
+ if record.Embedding == nil {
+ resized, err := img.Process(bimg.Options{
+ Width: int(backend.ImageSize[0]),
+ Height: int(backend.ImageSize[1]),
+ Force: true,
+ Type: bimg.PNG,
+ Interpretation: bimg.InterpretationSRGB,
+ })
+ if err != nil {
+ log.Println("resize failure", record.Filename, err)
+ } else {
+ toEmbed <- EmbeddingInput{
+ image: resized,
+ filename: record.Filename,
+ }
+ }
+ }
+ if record.Thumbnails == nil && config.EnableThumbnails {
+ toThumbnail <- LoadedImage{
+ image: img,
+ filename: record.Filename,
+ originalSize: len(buffer),
+ }
+ }
+ if record.RawOcrSegments == nil && config.EnableOCR {
+ toOCR <- LoadedImage{
+ image: img,
+ filename: record.Filename,
+ }
+ }
+ }
+ return nil
+ })
+ }
+
+ if config.EnableThumbnails {
+ for range runtime.NumCPU() {
+ wg.Go(func() error {
+ for image := range toThumbnail {
+ generatedFormats := make([]string, 0)
+ for formatName, formatConfig := range formats {
+ var err error
+ var resized []byte
+ if formatConfig.targetFilesize != 0 {
+ lb := 1
+ ub := 100
+ for {
+ quality := (lb + ub) / 2
+ resized, err = image.image.Process(bimg.Options{
+ Width: formatConfig.targetWidth,
+ Type: formatConfig.format,
+ Speed: 4,
+ Quality: quality,
+ StripMetadata: true,
+ Enlarge: false,
+ })
+ if len(resized) > image.originalSize {
+ ub = quality
+ } else {
+ lb = quality + 1
+ }
+ if lb >= ub {
+ break
+ }
+ }
+ } else {
+ resized, err = image.image.Process(bimg.Options{
+ Width: formatConfig.targetWidth,
+ Type: formatConfig.format,
+ Speed: 4,
+ Quality: formatConfig.quality,
+ StripMetadata: true,
+ Enlarge: false,
+ })
+ }
+ if err != nil {
+ log.Println("thumbnailing failure", image.filename, err)
+ continue
+ }
+ if len(resized) < image.originalSize {
+ generatedFormats = append(generatedFormats, formatName)
+ err = bimg.Write(filepath.Join(config.ThumbsPath, generateThumbnailFilename(image.filename, formatName, formatConfig)), resized)
+ if err != nil {
+ return err
+ }
+ }
+ }
+ formatsData, err := msgpack.Marshal(generatedFormats)
+ if err != nil {
+ return err
+ }
+ _, err = db.Exec("UPDATE files SET thumbnails = ?, thumbnail_time = ? WHERE filename = ?", formatsData, timestamp(), image.filename)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+ })
+ }
+ }
+
+ if config.EnableOCR {
+ for range 100 {
+ wg.Go(func() error {
+ for image := range toOCR {
+ scan, err := scanImage(image.image)
+ if err != nil {
+ log.Println("OCR failure", image.filename, err)
+ continue
+ }
+ ocrText := ""
+ for _, segment := range scan {
+ ocrText += segment.text
+ ocrText += "\n"
+ }
+ ocrData, err := msgpack.Marshal(scan)
+ if err != nil {
+ return err
+ }
+ _, err = db.Exec("UPDATE files SET ocr = ?, raw_ocr_segments = ?, ocr_time = ? WHERE filename = ?", ocrText, ocrData, timestamp(), image.filename)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+ })
+ }
+ }
+
+ wg.Go(func() error {
+ buffer := make([]EmbeddingInput, 0, backend.BatchSize)
+ for input := range toEmbed {
+ buffer = append(buffer, input)
+ if len(buffer) == int(backend.BatchSize) {
+ embedBatches <- buffer
+ buffer = make([]EmbeddingInput, 0, backend.BatchSize)
+ }
+ }
+ if len(buffer) > 0 {
+ embedBatches <- buffer
+ }
+ close(embedBatches)
+ return nil
+ })
+
+ for range 3 {
+ wg.Go(func() error {
+ for batch := range embedBatches {
+ result, err := queryClipServer[EmbeddingRequest, EmbeddingResponse](config, "", EmbeddingRequest{
+ Images: lo.Map(batch, func(item EmbeddingInput, _ int) []byte { return item.image }),
+ })
+ if err != nil {
+ return err
+ }
+
+ tx, err := db.Begin()
+ if err != nil {
+ return err
+ }
+ for i, vector := range result {
+ _, err = tx.Exec("UPDATE files SET embedding_time = ?, embedding = ? WHERE filename = ?", timestamp(), vector, batch[i].filename)
+ if err != nil {
+ return err
+ }
+ }
+ err = tx.Commit()
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+ })
+ }
+
+ filenamesOnDisk := make(map[string]struct{})
+
+ err = filepath.WalkDir(config.Files, func(path string, d os.DirEntry, err error) error {
+ filename := strings.TrimPrefix(path, config.Files)
+ if err != nil {
+ return err
+ }
+ if d.IsDir() {
+ return nil
+ }
+ filenamesOnDisk[filename] = struct{}{}
+ records := []FileRecord{}
+ err = db.Select(&records, "SELECT * FROM files WHERE filename = ?", filename)
+ if err != nil {
+ return err
+ }
+ stat, err := d.Info()
+ if err != nil {
+ return err
+ }
+ modtime := stat.ModTime().UnixMicro()
+ if len(records) == 0 || modtime > records[0].EmbedTime || modtime > records[0].OcrTime || modtime > records[0].ThumbnailTime {
+ _, err = db.Exec("INSERT OR IGNORE INTO files VALUES (?, 0, 0, 0, '', '', '', '')", filename)
+ if err != nil {
+ return err
+ }
+ record := FileRecord{
+ Filename: filename,
+ }
+ if len(records) > 0 {
+ record = records[0]
+ }
+ if modtime > record.EmbedTime || len(record.Embedding) == 0 {
+ record.Embedding = nil
+ }
+ if modtime > record.OcrTime || len(record.RawOcrSegments) == 0 {
+ record.RawOcrSegments = nil
+ }
+ if modtime > record.ThumbnailTime || len(record.Thumbnails) == 0 {
+ record.Thumbnails = nil
+ }
+ toProcess <- record
+ }
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+ close(toProcess)
+
+ err = iwg.Wait()
+ close(toEmbed)
+ close(toThumbnail)
+ if err != nil {
+ return err
+ }
+ err = wg.Wait()
+ if err != nil {
+ return err
+ }
+
+ rows, err := db.Queryx("SELECT filename FROM files")
+ if err != nil {
+ return err
+ }
+ tx, err := db.Begin()
+ if err != nil {
+ return err
+ }
+ for rows.Next() {
+ var filename string
+ err := rows.Scan(&filename)
+ if err != nil {
+ return err
+ }
+ if _, ok := filenamesOnDisk[filename]; !ok {
+ _, err = tx.Exec("DELETE FROM files WHERE filename = ?", filename)
+ if err != nil {
+ return err
+ }
+ }
+ }
+ if err = tx.Commit(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+const INDEX_ADD_BATCH = 512
+
+func buildIndex(config Config, backend InferenceServerConfig) (Index, error) {
+ var index Index
+
+ db, err := initializeDatabase(config)
+ if err != nil {
+ return index, err
+ }
+ defer db.Close()
+
+ newFAISSIndex, err := faiss.IndexFactory(int(backend.EmbeddingSize), "SQfp16", faiss.MetricInnerProduct)
+ if err != nil {
+ return index, err
+ }
+ index.vectors = newFAISSIndex
+
+ var count int
+ err = db.Get(&count, "SELECT COUNT(*) FROM files")
+ if err != nil {
+ return index, err
+ }
+
+ index.filenames = make([]string, 0, count)
+ index.formatCodes = make([]int64, 0, count)
+ buffer := make([]float32, 0, INDEX_ADD_BATCH*backend.EmbeddingSize)
+ index.formatNames = make([]string, 0, 5)
+
+ record := FileRecord{}
+ rows, err := db.Queryx("SELECT * FROM files")
+ if err != nil {
+ return index, err
+ }
+ for rows.Next() {
+ err := rows.StructScan(&record)
+ if err != nil {
+ return index, err
+ }
+ if len(record.Embedding) > 0 {
+ index.filenames = append(index.filenames, record.Filename)
+ for i := 0; i < len(record.Embedding); i += 2 {
+ buffer = append(buffer, float16.Frombits(uint16(record.Embedding[i])+uint16(record.Embedding[i+1])<<8).Float32())
+ }
+ if len(buffer) == cap(buffer) {
+ index.vectors.Add(buffer)
+ buffer = make([]float32, 0, INDEX_ADD_BATCH*backend.EmbeddingSize)
+ }
+
+ formats := make([]string, 0, 5)
+ if len(record.Thumbnails) > 0 {
+ err := msgpack.Unmarshal(record.Thumbnails, &formats)
+ if err != nil {
+ return index, err
+ }
+ }
+
+ formatCode := int64(0)
+ for _, formatString := range formats {
+ found := false
+ for i, name := range index.formatNames {
+ if name == formatString {
+ formatCode |= 1 << i
+ found = true
+ break
+ }
+ }
+ if !found {
+ newIndex := len(index.formatNames)
+ formatCode |= 1 << newIndex
+ index.formatNames = append(index.formatNames, formatString)
+ }
+ }
+ index.formatCodes = append(index.formatCodes, formatCode)
+ }
+ }
+ if len(buffer) > 0 {
+ index.vectors.Add(buffer)
+ }
+
+ return index, nil
+}
+
+func decodeFP16Buffer(buf []byte) []float32 {
+ out := make([]float32, 0, len(buf)/2)
+ for i := 0; i < len(buf); i += 2 {
+ out = append(out, float16.Frombits(uint16(buf[i])+uint16(buf[i+1])<<8).Float32())
+ }
+ return out
+}
+
+type EmbeddingVector []float32
+
+type QueryResult struct {
+ Matches [][]interface{} `json:"matches"`
+ Formats []string `json:"formats"`
+ Extensions map[string]string `json:"extensions"`
+}
+
+// this terrible language cannot express tagged unions
+type QueryTerm struct {
+ Embedding *EmbeddingVector `json:"embedding"`
+ Image *string `json:"image"` // base64
+ Text *string `json:"text"`
+ Weight *float32 `json:"weight"`
+}
+
+type QueryRequest struct {
+ Terms []QueryTerm `json:"terms"`
+ K *int `json:"k"`
+}
+
+func queryIndex(index *Index, query EmbeddingVector, k int) (QueryResult, error) {
+ var qr QueryResult
+ distances, ids, err := index.vectors.Search(query, int64(k))
+ if err != nil {
+ return qr, err
+ }
+ items := lo.Map(lo.Zip2(distances, ids), func(x lo.Tuple2[float32, int64], i int) []interface{} {
+ return []interface{}{
+ x.A,
+ index.filenames[x.B],
+ generateFilenameHash(index.filenames[x.B]),
+ index.formatCodes[x.B],
+ }
+ })
+
+ return QueryResult{
+ Matches: items,
+ Formats: index.formatNames,
+ }, nil
+}
+
+func handleRequest(config Config, backendConfig InferenceServerConfig, index *Index, w http.ResponseWriter, req *http.Request) error {
+ if req.Body == nil {
+ io.WriteString(w, "OK") // health check
+ return nil
+ }
+ dec := json.NewDecoder(req.Body)
+ var qreq QueryRequest
+ err := dec.Decode(&qreq)
+ if err != nil {
+ return err
+ }
+
+ totalEmbedding := make(EmbeddingVector, backendConfig.EmbeddingSize)
+
+ imageBatch := make([][]byte, 0)
+ imageWeights := make([]float32, 0)
+ textBatch := make([]string, 0)
+ textWeights := make([]float32, 0)
+
+ for _, term := range qreq.Terms {
+ if term.Image != nil {
+ bytes, err := base64.StdEncoding.DecodeString(*term.Image)
+ if err != nil {
+ return err
+ }
+ loaded := bimg.NewImage(bytes)
+ resized, err := loaded.Process(bimg.Options{
+ Width: int(backendConfig.ImageSize[0]),
+ Height: int(backendConfig.ImageSize[1]),
+ Force: true,
+ Type: bimg.PNG,
+ Interpretation: bimg.InterpretationSRGB,
+ })
+ if err != nil {
+ return err
+ }
+ imageBatch = append(imageBatch, resized)
+ if term.Weight != nil {
+ imageWeights = append(imageWeights, *term.Weight)
+ } else {
+ imageWeights = append(imageWeights, 1)
+ }
+ }
+ if term.Text != nil {
+ textBatch = append(textBatch, *term.Text)
+ if term.Weight != nil {
+ textWeights = append(textWeights, *term.Weight)
+ } else {
+ textWeights = append(textWeights, 1)
+ }
+ }
+ if term.Embedding != nil {
+ weight := float32(1.0)
+ if term.Weight != nil {
+ weight = *term.Weight
+ }
+ for i := 0; i < int(backendConfig.EmbeddingSize); i += 1 {
+ totalEmbedding[i] += (*term.Embedding)[i] * weight
+ }
+ }
+ }
+
+ if len(imageBatch) > 0 {
+ embs, err := queryClipServer[EmbeddingRequest, EmbeddingResponse](config, "/", EmbeddingRequest{
+ Images: imageBatch,
+ })
+ if err != nil {
+ return err
+ }
+ for j, emb := range embs {
+ embd := decodeFP16Buffer(emb)
+ for i := 0; i < int(backendConfig.EmbeddingSize); i += 1 {
+ totalEmbedding[i] += embd[i] * imageWeights[j]
+ }
+ }
+ }
+ if len(textBatch) > 0 {
+ embs, err := queryClipServer[EmbeddingRequest, EmbeddingResponse](config, "/", EmbeddingRequest{
+ Text: textBatch,
+ })
+ if err != nil {
+ return err
+ }
+ for j, emb := range embs {
+ embd := decodeFP16Buffer(emb)
+ for i := 0; i < int(backendConfig.EmbeddingSize); i += 1 {
+ totalEmbedding[i] += embd[i] * textWeights[j]
+ }
+ }
+ }
+
+ k := 1000
+ if qreq.K != nil {
+ k = *qreq.K
+ }
+
+ w.Header().Add("Content-Type", "application/json")
+ enc := json.NewEncoder(w)
+
+ qres, err := queryIndex(index, totalEmbedding, k)
+
+ qres.Extensions = make(map[string]string)
+ for k, v := range imageFormats(config) {
+ qres.Extensions[k] = v.extension
+ }
+
+ if err != nil {
+ return err
+ }
+
+ err = enc.Encode(qres)
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+func init() {
+ os.Setenv("VIPS_WARNING", "FALSE") // this does not actually work
+ bimg.VipsCacheSetMax(0)
+ bimg.VipsCacheSetMaxMem(0)
+}
+
+func main() {
+ content, err := os.ReadFile(os.Args[1])
+ if err != nil {
+ log.Fatal("config file unreadable ", err)
+ }
+ var config Config
+ err = json.Unmarshal(content, &config)
+ if err != nil {
+ log.Fatal("config file wrong ", err)
+ }
+ fmt.Println(config)
+
+ db, err := sqlx.Connect("sqlite3", config.DbPath)
+ if err != nil {
+ log.Fatal("DB connection failure ", db)
+ }
+ db.MustExec(schema)
+
+ var backend InferenceServerConfig
+ for {
+ resp, err := http.Get(config.ClipServer + "/config")
+ if err != nil {
+ log.Println("backend failed (fetch) ", err)
+ }
+ backend, err = decodeMsgpackFrom[InferenceServerConfig](resp)
+ resp.Body.Close()
+ if err != nil {
+ log.Println("backend failed (parse) ", err)
+ } else {
+ break
+ }
+ time.Sleep(time.Second)
+ }
+
+ requestIngest := make(chan struct{}, 1)
+
+ var index *Index
+ // maybe this ought to be mutexed?
+ var lastError *error
+ // there's not a neat way to reusably broadcast to multiple channels, but I *can* abuse WaitGroups probably
+ // this might cause horrible concurrency issues, but you brought me to this point, Go designers
+ var wg sync.WaitGroup
+
+ go func() {
+ for {
+ wg.Add(1)
+ log.Println("ingest running")
+ err := ingestFiles(config, backend)
+ if err != nil {
+ log.Println("ingest failed ", err)
+ lastError = &err
+ } else {
+ newIndex, err := buildIndex(config, backend)
+ if err != nil {
+ log.Println("index build failed ", err)
+ lastError = &err
+ } else {
+ lastError = nil
+ index = &newIndex
+ }
+ }
+ wg.Done()
+ <-requestIngest
+ }
+ }()
+ newIndex, err := buildIndex(config, backend)
+ index = &newIndex
+ if err != nil {
+ log.Fatal("index build failed ", err)
+ }
+
+ http.HandleFunc("/", func(w http.ResponseWriter, req *http.Request) {
+ w.Header().Add("Access-Control-Allow-Origin", "*")
+ w.Header().Add("Access-Control-Allow-Headers", "Content-Type")
+ if req.Method == "OPTIONS" {
+ w.WriteHeader(204)
+ return
+ }
+ err := handleRequest(config, backend, index, w, req)
+ if err != nil {
+ w.Header().Add("Content-Type", "application/json")
+ w.WriteHeader(500)
+ json.NewEncoder(w).Encode(map[string]string{
+ "error": err.Error(),
+ })
+ }
+ })
+ http.HandleFunc("/reload", func(w http.ResponseWriter, req *http.Request) {
+ if req.Method == "POST" {
+ log.Println("requesting index reload")
+ select {
+ case requestIngest <- struct{}{}:
+ default:
+ }
+ wg.Wait()
+ if lastError == nil {
+ w.Write([]byte("OK"))
+ } else {
+ w.WriteHeader(500)
+ w.Write([]byte((*lastError).Error()))
+ }
+ }
+ })
+ http.HandleFunc("/profile", func(w http.ResponseWriter, req *http.Request) {
+ f, err := os.Create("mem.pprof")
+ if err != nil {
+ log.Fatal("could not create memory profile: ", err)
+ }
+ defer f.Close()
+ var m runtime.MemStats
+ runtime.ReadMemStats(&m)
+ log.Printf("Memory usage: Alloc=%v, TotalAlloc=%v, Sys=%v", m.Alloc, m.TotalAlloc, m.Sys)
+ log.Println(bimg.VipsMemory())
+ bimg.VipsDebugInfo()
+ runtime.GC() // Trigger garbage collection
+ if err := pprof.WriteHeapProfile(f); err != nil {
+ log.Fatal("could not write memory profile: ", err)
+ }
+ })
+ log.Println("starting server")
+ http.ListenAndServe(fmt.Sprintf(":%d", config.Port), nil)
+}
diff --git a/misc/bad-go-version/ocr.go b/misc/bad-go-version/ocr.go
new file mode 100644
index 0000000..55ca675
--- /dev/null
+++ b/misc/bad-go-version/ocr.go
@@ -0,0 +1,264 @@
+package main
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "math"
+ "mime/multipart"
+ "net/http"
+ "net/textproto"
+ "regexp"
+ "strings"
+ "time"
+
+ "github.com/h2non/bimg"
+ "github.com/samber/lo"
+ "github.com/titanous/json5"
+)
+
+const CALLBACK_REGEX string = ">AF_initDataCallback\\(({key: 'ds:1'.*?)\\);"
+
+type SegmentCoords struct {
+ x int
+ y int
+ w int
+ h int
+}
+
+type Segment struct {
+ coords SegmentCoords
+ text string
+}
+
+type ScanResult []Segment
+
+// TODO coordinates are negative sometimes and I think they shouldn't be
+func rationalizeCoordsFormat1(imageW float64, imageH float64, centerXFraction float64, centerYFraction float64, widthFraction float64, heightFraction float64) SegmentCoords {
+ return SegmentCoords{
+ x: int(math.Round((centerXFraction - widthFraction/2) * imageW)),
+ y: int(math.Round((centerYFraction - heightFraction/2) * imageH)),
+ w: int(math.Round(widthFraction * imageW)),
+ h: int(math.Round(heightFraction * imageH)),
+ }
+}
+
+func scanImageChunk(image []byte, imageWidth int, imageHeight int) (ScanResult, error) {
+ var result ScanResult
+ timestamp := time.Now().UnixMicro()
+ var b bytes.Buffer
+ w := multipart.NewWriter(&b)
+ defer w.Close()
+ h := make(textproto.MIMEHeader)
+ h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="encoded_image"; filename="ocr%d.png"`, timestamp))
+ h.Set("Content-Type", "image/png")
+ fw, err := w.CreatePart(h)
+ if err != nil {
+ return result, err
+ }
+ fw.Write(image)
+ w.Close()
+
+ req, err := http.NewRequest("POST", fmt.Sprintf("https://lens.google.com/v3/upload?stcs=%d", timestamp), &b)
+ if err != nil {
+ return result, err
+ }
+ req.Header.Add("User-Agent", "Mozilla/5.0 (Linux; Android 13; RMX3771) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.144 Mobile Safari/537.36")
+ req.AddCookie(&http.Cookie{
+ Name: "SOCS",
+ Value: "CAESEwgDEgk0ODE3Nzk3MjQaAmVuIAEaBgiA_LyaBg",
+ })
+ req.Header.Set("Content-Type", w.FormDataContentType())
+ client := http.Client{}
+ res, err := client.Do(req)
+ if err != nil {
+ return result, err
+ }
+ defer res.Body.Close()
+ body, err := io.ReadAll(res.Body)
+ if err != nil {
+ return result, err
+ }
+ re, _ := regexp.Compile(CALLBACK_REGEX)
+ matches := re.FindStringSubmatch(string(body[:]))
+ if len(matches) == 0 {
+ return result, fmt.Errorf("invalid API response")
+ }
+ match := matches[1]
+ var lensObject map[string]interface{}
+ err = json5.Unmarshal([]byte(match), &lensObject)
+ if err != nil {
+ return result, err
+ }
+
+ if _, ok := lensObject["errorHasStatus"]; ok {
+ return result, errors.New("lens failed")
+ }
+
+ root := lensObject["data"].([]interface{})
+
+ var textSegments []string
+ var textRegions []SegmentCoords
+
+ // I don't know why Google did this.
+ // Text segments are in one place and their locations are in another, using a very strange coordinate system.
+ // At least I don't need whatever is contained in the base64 parts (which I assume are protobufs).
+ // TODO: on a few images, this seems to not work for some reason.
+ defer func() {
+ if r := recover(); r != nil {
+ // https://github.com/dimdenGD/chrome-lens-ocr/blob/main/src/core.js#L316 has code for a fallback text segment read mode.
+ // In testing, this proved unnecessary (quirks of the HTTP request? I don't know), and this only happens on textless images.
+ textSegments = []string{}
+ textRegions = []SegmentCoords{}
+ }
+ }()
+
+ textSegmentsRaw := root[3].([]interface{})[4].([]interface{})[0].([]interface{})[0].([]interface{})
+ textRegionsRaw := root[2].([]interface{})[3].([]interface{})[0].([]interface{})
+ for _, x := range textRegionsRaw {
+ if strings.HasPrefix(x.([]interface{})[11].(string), "text:") {
+ rawCoords := x.([]interface{})[1].([]interface{})
+ coords := rationalizeCoordsFormat1(float64(imageWidth), float64(imageHeight), rawCoords[0].(float64), rawCoords[1].(float64), rawCoords[2].(float64), rawCoords[3].(float64))
+ textRegions = append(textRegions, coords)
+ }
+ }
+ for _, x := range textSegmentsRaw {
+ textSegment := x.(string)
+ textSegments = append(textSegments, textSegment)
+ }
+
+ return lo.Map(lo.Zip2(textSegments, textRegions), func(x lo.Tuple2[string, SegmentCoords], _ int) Segment {
+ return Segment{
+ text: x.A,
+ coords: x.B,
+ }
+ }), nil
+}
+
+const MAX_DIM int = 1024
+
+func scanImage(image *bimg.Image) (ScanResult, error) {
+ result := ScanResult{}
+ metadata, err := image.Metadata()
+ if err != nil {
+ return result, err
+ }
+ width := metadata.Size.Width
+ height := metadata.Size.Height
+ if width > MAX_DIM {
+ width = MAX_DIM
+ height = int(math.Round(float64(height) * (float64(width) / float64(metadata.Size.Width))))
+ }
+ for y := 0; y < height; y += MAX_DIM {
+ chunkHeight := MAX_DIM
+ if y+chunkHeight > height {
+ chunkHeight = height - y
+ }
+ chunk, err := image.Process(bimg.Options{
+ Height: height, // these are for overall image dimensions (resize then crop)
+ Width: width,
+ Top: y,
+ AreaHeight: chunkHeight,
+ AreaWidth: width,
+ Crop: true,
+ Type: bimg.PNG,
+ })
+ if err != nil {
+ return result, err
+ }
+ res, err := scanImageChunk(chunk, width, chunkHeight)
+ if err != nil {
+ return result, err
+ }
+ for _, segment := range res {
+ result = append(result, Segment{
+ text: segment.text,
+ coords: SegmentCoords{
+ y: segment.coords.y + y,
+ x: segment.coords.x,
+ w: segment.coords.w,
+ h: segment.coords.h,
+ },
+ })
+ }
+ }
+
+ return result, nil
+}
+
+/*
+async def scan_image_chunk(sess, image):
+ # send data to inscrutable undocumented Google service
+ # https://github.com/AuroraWright/owocr/blob/master/owocr/ocr.py#L193
+ async with aiohttp.ClientSession() as sess:
+ data = aiohttp.FormData()
+ data.add_field(
+ "encoded_image",
+ encode_img(image),
+ filename="ocr" + str(timestamp) + ".png",
+ content_type="image/png"
+ )
+ async with sess.post(url, headers=headers, cookies=cookies, data=data, timeout=10) as res:
+ body = await res.text()
+
+ # I really worry about Google sometimes. This is not a sensible format.
+ match = CALLBACK_REGEX.search(body)
+ if match == None:
+ raise ValueError("Invalid callback")
+
+ lens_object = pyjson5.loads(match.group(1))
+ if "errorHasStatus" in lens_object:
+ raise RuntimeError("Lens failed")
+
+ text_segments = []
+ text_regions = []
+
+ root = lens_object["data"]
+
+ # I don't know why Google did this.
+ # Text segments are in one place and their locations are in another, using a very strange coordinate system.
+ # At least I don't need whatever is contained in the base64 partss (which I assume are protobufs).
+ # TODO: on a few images, this seems to not work for some reason.
+ try:
+ text_segments = root[3][4][0][0]
+ text_regions = [ rationalize_coords_format1(image.width, image.height, *x[1]) for x in root[2][3][0] if x[11].startswith("text:") ]
+ except (KeyError, IndexError):
+ # https://github.com/dimdenGD/chrome-lens-ocr/blob/main/src/core.js#L316 has code for a fallback text segment read mode.
+ # In testing, this proved unnecessary (quirks of the HTTP request? I don't know), and this only happens on textless images.
+ return [], []
+
+ return text_segments, text_regions
+
+MAX_SCAN_DIM = 1000 # not actually true but close enough
+def chunk_image(image: Image):
+ chunks = []
+ # Cut image down in X axis (I'm assuming images aren't too wide to scan in downscaled form because merging text horizontally would be annoying)
+ if image.width > MAX_SCAN_DIM:
+ image = image.resize((MAX_SCAN_DIM, round(image.height * (image.width / MAX_SCAN_DIM))), Image.LANCZOS)
+ for y in range(0, image.height, MAX_SCAN_DIM):
+ chunks.append(image.crop((0, y, image.width, min(y + MAX_SCAN_DIM, image.height))))
+ return chunks
+
+async def scan_chunks(sess: aiohttp.ClientSession, chunks: [Image]):
+ # If text happens to be split across the cut line it won't get read.
+ # This is because doing overlap read areas would be really annoying.
+ text = ""
+ regions = []
+ for chunk in chunks:
+ new_segments, new_regions = await scan_image_chunk(sess, chunk)
+ for segment in new_segments:
+ text += segment + "\n"
+ for i, (segment, region) in enumerate(zip(new_segments, new_regions)):
+ regions.append({ **region, "y": region["y"] + (MAX_SCAN_DIM * i), "text": segment })
+ return text, regions
+
+async def scan_image(sess: aiohttp.ClientSession, image: Image):
+ return await scan_chunks(sess, chunk_image(image))
+
+if __name__ == "__main__":
+ async def main():
+ async with aiohttp.ClientSession() as sess:
+ print(await scan_image(sess, Image.open("/data/public/memes-or-something/linear-algebra-chess.png")))
+ asyncio.run(main())
+*/
diff --git a/misc/bad-go-version/problematic_thing.go b/misc/bad-go-version/problematic_thing.go
new file mode 100644
index 0000000..487be9c
--- /dev/null
+++ b/misc/bad-go-version/problematic_thing.go
@@ -0,0 +1,891 @@
+package main
+
+import (
+ "bytes"
+ "encoding/base64"
+ "encoding/json"
+ "fmt"
+ "hash/fnv"
+ "io"
+ "log"
+ "net/http"
+ "os"
+ "path/filepath"
+ "runtime"
+ "runtime/pprof"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/DataIntelligenceCrew/go-faiss"
+ "github.com/davidbyttow/govips/v2/vips"
+ "github.com/h2non/bimg"
+ "github.com/jmoiron/sqlx"
+ _ "github.com/mattn/go-sqlite3"
+ "github.com/samber/lo"
+ "github.com/vmihailenco/msgpack"
+ "github.com/x448/float16"
+ "golang.org/x/sync/errgroup"
+)
+
+type Config struct {
+ ClipServer string `json:"clip_server"`
+ DbPath string `json:"db_path"`
+ Port int16 `json:"port"`
+ Files string `json:"files"`
+ EnableOCR bool `json:"enable_ocr"`
+ ThumbsPath string `json:"thumbs_path"`
+ EnableThumbnails bool `json:"enable_thumbs"`
+}
+
+type Index struct {
+ vectors *faiss.IndexImpl
+ filenames []string
+ formatCodes []int64
+ formatNames []string
+}
+
+var schema = `
+CREATE TABLE IF NOT EXISTS files (
+ filename TEXT PRIMARY KEY,
+ embedding_time INTEGER,
+ ocr_time INTEGER,
+ thumbnail_time INTEGER,
+ embedding BLOB,
+ ocr TEXT,
+ raw_ocr_segments BLOB,
+ thumbnails BLOB
+);
+
+CREATE VIRTUAL TABLE IF NOT EXISTS ocr_fts USING fts5 (
+ filename,
+ ocr,
+ tokenize='unicode61 remove_diacritics 2',
+ content='ocr'
+);
+
+CREATE TRIGGER IF NOT EXISTS ocr_fts_ins AFTER INSERT ON files BEGIN
+ INSERT INTO ocr_fts (rowid, filename, ocr) VALUES (new.rowid, new.filename, COALESCE(new.ocr, ''));
+END;
+
+CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER DELETE ON files BEGIN
+ INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, ''));
+END;
+
+CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER UPDATE ON files BEGIN
+ INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, ''));
+ INSERT INTO ocr_fts (rowid, filename, text) VALUES (new.rowid, new.filename, COALESCE(new.ocr, ''));
+END;
+`
+
+type FileRecord struct {
+ Filename string `db:"filename"`
+ EmbedTime int64 `db:"embedding_time"`
+ OcrTime int64 `db:"ocr_time"`
+ ThumbnailTime int64 `db:"thumbnail_time"`
+ Embedding []byte `db:"embedding"`
+ Ocr string `db:"ocr"`
+ RawOcrSegments []byte `db:"raw_ocr_segments"`
+ Thumbnails []byte `db:"thumbnails"`
+ filesize int64
+}
+
+type InferenceServerConfig struct {
+ BatchSize uint `msgpack:"batch"`
+ ImageSize []uint `msgpack:"image_size"`
+ EmbeddingSize uint `msgpack:"embedding_size"`
+}
+
+func decodeMsgpackFrom[O interface{}](resp *http.Response) (O, error) {
+ var result O
+ respData, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return result, err
+ }
+ err = msgpack.Unmarshal(respData, &result)
+ return result, err
+}
+
+func queryClipServer[I interface{}, O interface{}](config Config, path string, data I) (O, error) {
+ var result O
+ b, err := msgpack.Marshal(data)
+ if err != nil {
+ return result, err
+ }
+ resp, err := http.Post(config.ClipServer+path, "application/msgpack", bytes.NewReader(b))
+ if err != nil {
+ return result, err
+ }
+ defer resp.Body.Close()
+ return decodeMsgpackFrom[O](resp)
+}
+
+type LoadedImage struct {
+ image *vips.ImageRef
+ filename string
+ originalSize int
+}
+
+type EmbeddingInput struct {
+ image []byte
+ filename string
+}
+
+type EmbeddingRequest struct {
+ Images [][]byte `msgpack:"images"`
+ Text []string `msgpack:"text"`
+}
+
+type EmbeddingResponse = [][]byte
+
+func timestamp() int64 {
+ return time.Now().UnixMicro()
+}
+
+type ImageFormatConfig struct {
+ targetWidth int
+ targetFilesize int
+ quality int
+ format vips.ImageType
+ extension string
+}
+
+func generateFilenameHash(filename string) string {
+ hasher := fnv.New128()
+ hasher.Write([]byte(filename))
+ hash := hasher.Sum(make([]byte, 0))
+ return base64.RawURLEncoding.EncodeToString(hash)
+}
+
+func generateThumbnailFilename(filename string, formatName string, formatConfig ImageFormatConfig) string {
+ return fmt.Sprintf("%s%s.%s", generateFilenameHash(filename), formatName, formatConfig.extension)
+}
+
+func initializeDatabase(config Config) (*sqlx.DB, error) {
+ db, err := sqlx.Connect("sqlite3", config.DbPath)
+ if err != nil {
+ return nil, err
+ }
+ _, err = db.Exec("PRAGMA busy_timeout = 2000; PRAGMA journal_mode = WAL")
+ if err != nil {
+ return nil, err
+ }
+ return db, nil
+}
+
+func imageFormats(config Config) map[string]ImageFormatConfig {
+ return map[string]ImageFormatConfig{
+ "jpegl": {
+ targetWidth: 800,
+ quality: 70,
+ format: vips.ImageTypeJPEG,
+ extension: "jpg",
+ },
+ "jpegh": {
+ targetWidth: 1600,
+ quality: 80,
+ format: vips.ImageTypeJPEG,
+ extension: "jpg",
+ },
+ "jpeg256kb": {
+ targetWidth: 500,
+ targetFilesize: 256000,
+ format: vips.ImageTypeJPEG,
+ extension: "jpg",
+ },
+ "avifh": {
+ targetWidth: 1600,
+ quality: 80,
+ format: vips.ImageTypeAVIF,
+ extension: "avif",
+ },
+ "avifl": {
+ targetWidth: 800,
+ quality: 30,
+ format: vips.ImageTypeAVIF,
+ extension: "avif",
+ },
+ }
+}
+
+func ingestFiles(config Config, backend InferenceServerConfig) error {
+ var wg errgroup.Group
+ var iwg errgroup.Group
+
+ // We assume everything is either a modern browser (low-DPI or high-DPI), an ancient browser or a ComputerCraft machine abusing Extra Utilities 2 screens.
+ var formats = imageFormats(config)
+
+ db, err := initializeDatabase(config)
+ if err != nil {
+ return err
+ }
+ defer db.Close()
+
+ toProcess := make(chan FileRecord, 100)
+ toEmbed := make(chan EmbeddingInput, backend.BatchSize)
+ toThumbnail := make(chan LoadedImage, 30)
+ toOCR := make(chan LoadedImage, 30)
+ embedBatches := make(chan []EmbeddingInput, 1)
+
+ // image loading and preliminary resizing
+ for range runtime.NumCPU() {
+ iwg.Go(func() error {
+ for record := range toProcess {
+ path := filepath.Join(config.Files, record.Filename)
+ img, err := vips.LoadImageFromFile(path, &vips.ImportParams{})
+ if err != nil {
+ log.Println("could not read", record.Filename)
+ continue
+ }
+ if record.Embedding == nil {
+ i, err := img.Copy() // TODO this is ugly, we should not need to do in-place operations
+ if err != nil {
+ return err
+ }
+ err = i.ResizeWithVScale(float64(backend.ImageSize[0])/float64(i.Width()), float64(backend.ImageSize[1])/float64(i.Height()), vips.KernelLanczos3)
+ if err != nil {
+ return err
+ }
+ resized, _, err := i.ExportPng(vips.NewPngExportParams())
+ if err != nil {
+ log.Println("resize failure", record.Filename, err)
+ } else {
+ toEmbed <- EmbeddingInput{
+ image: resized,
+ filename: record.Filename,
+ }
+ }
+ }
+ if record.Thumbnails == nil && config.EnableThumbnails {
+ toThumbnail <- LoadedImage{
+ image: img,
+ filename: record.Filename,
+ originalSize: int(record.filesize),
+ }
+ }
+ if record.RawOcrSegments == nil && config.EnableOCR {
+ toOCR <- LoadedImage{
+ image: img,
+ filename: record.Filename,
+ }
+ }
+ }
+ return nil
+ })
+ }
+
+ if config.EnableThumbnails {
+ for range runtime.NumCPU() {
+ wg.Go(func() error {
+ for image := range toThumbnail {
+ generatedFormats := make([]string, 0)
+ for formatName, formatConfig := range formats {
+ var err error
+ var resized []byte
+ if formatConfig.targetFilesize != 0 {
+ lb := 1
+ ub := 100
+ for {
+ quality := (lb + ub) / 2
+ i, err := image.image.Copy()
+ if err != nil {
+ return err
+ }
+ i.Resize(float64(formatConfig.targetWidth)/float64(i.Width()), vips.KernelLanczos3)
+ resized, _, err = i.Export(&vips.ExportParams{
+ Format: formatConfig.format,
+ Speed: 4,
+ Quality: quality,
+ StripMetadata: true,
+ })
+ if len(resized) > image.originalSize {
+ ub = quality
+ } else {
+ lb = quality + 1
+ }
+ if lb >= ub {
+ break
+ }
+ }
+ } else {
+ i, err := image.image.Copy()
+ if err != nil {
+ return err
+ }
+ i.Resize(float64(formatConfig.targetWidth)/float64(i.Width()), vips.KernelLanczos3)
+ resized, _, err = i.Export(&vips.ExportParams{
+ Format: formatConfig.format,
+ Speed: 4,
+ Quality: formatConfig.quality,
+ StripMetadata: true,
+ })
+ }
+ if err != nil {
+ log.Println("thumbnailing failure", image.filename, err)
+ continue
+ }
+ if len(resized) < image.originalSize {
+ generatedFormats = append(generatedFormats, formatName)
+ err = bimg.Write(filepath.Join(config.ThumbsPath, generateThumbnailFilename(image.filename, formatName, formatConfig)), resized)
+ if err != nil {
+ return err
+ }
+ }
+ }
+ formatsData, err := msgpack.Marshal(generatedFormats)
+ if err != nil {
+ return err
+ }
+ _, err = db.Exec("UPDATE files SET thumbnails = ?, thumbnail_time = ? WHERE filename = ?", formatsData, timestamp(), image.filename)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+ })
+ }
+ }
+
+ if config.EnableOCR {
+ for range 100 {
+ wg.Go(func() error {
+ for image := range toOCR {
+ scan, err := scanImage(image.image)
+ if err != nil {
+ log.Println("OCR failure", image.filename, err)
+ continue
+ }
+ ocrText := ""
+ for _, segment := range scan {
+ ocrText += segment.text
+ ocrText += "\n"
+ }
+ ocrData, err := msgpack.Marshal(scan)
+ if err != nil {
+ return err
+ }
+ _, err = db.Exec("UPDATE files SET ocr = ?, raw_ocr_segments = ?, ocr_time = ? WHERE filename = ?", ocrText, ocrData, timestamp(), image.filename)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+ })
+ }
+ }
+
+ wg.Go(func() error {
+ buffer := make([]EmbeddingInput, 0, backend.BatchSize)
+ for input := range toEmbed {
+ buffer = append(buffer, input)
+ if len(buffer) == int(backend.BatchSize) {
+ embedBatches <- buffer
+ buffer = make([]EmbeddingInput, 0, backend.BatchSize)
+ }
+ }
+ if len(buffer) > 0 {
+ embedBatches <- buffer
+ }
+ close(embedBatches)
+ return nil
+ })
+
+ for range 3 {
+ wg.Go(func() error {
+ for batch := range embedBatches {
+ result, err := queryClipServer[EmbeddingRequest, EmbeddingResponse](config, "", EmbeddingRequest{
+ Images: lo.Map(batch, func(item EmbeddingInput, _ int) []byte { return item.image }),
+ })
+ if err != nil {
+ return err
+ }
+
+ tx, err := db.Begin()
+ if err != nil {
+ return err
+ }
+ for i, vector := range result {
+ _, err = tx.Exec("UPDATE files SET embedding_time = ?, embedding = ? WHERE filename = ?", timestamp(), vector, batch[i].filename)
+ if err != nil {
+ return err
+ }
+ }
+ err = tx.Commit()
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+ })
+ }
+
+ filenamesOnDisk := make(map[string]struct{})
+
+ err = filepath.WalkDir(config.Files, func(path string, d os.DirEntry, err error) error {
+ filename := strings.TrimPrefix(path, config.Files)
+ if err != nil {
+ return err
+ }
+ if d.IsDir() {
+ return nil
+ }
+ filenamesOnDisk[filename] = struct{}{}
+ records := []FileRecord{}
+ err = db.Select(&records, "SELECT * FROM files WHERE filename = ?", filename)
+ if err != nil {
+ return err
+ }
+ stat, err := d.Info()
+ if err != nil {
+ return err
+ }
+ modtime := stat.ModTime().UnixMicro()
+ if len(records) == 0 || modtime > records[0].EmbedTime || modtime > records[0].OcrTime || modtime > records[0].ThumbnailTime {
+ _, err = db.Exec("INSERT OR IGNORE INTO files VALUES (?, 0, 0, 0, '', '', '', '')", filename)
+ if err != nil {
+ return err
+ }
+ record := FileRecord{
+ Filename: filename,
+ filesize: stat.Size(),
+ }
+ if len(records) > 0 {
+ record = records[0]
+ }
+ if modtime > record.EmbedTime || len(record.Embedding) == 0 {
+ record.Embedding = nil
+ }
+ if modtime > record.OcrTime || len(record.RawOcrSegments) == 0 {
+ record.RawOcrSegments = nil
+ }
+ if modtime > record.ThumbnailTime || len(record.Thumbnails) == 0 {
+ record.Thumbnails = nil
+ }
+ toProcess <- record
+ }
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+ close(toProcess)
+
+ err = iwg.Wait()
+ close(toEmbed)
+ close(toThumbnail)
+ if err != nil {
+ return err
+ }
+ err = wg.Wait()
+ if err != nil {
+ return err
+ }
+
+ rows, err := db.Queryx("SELECT filename FROM files")
+ if err != nil {
+ return err
+ }
+ tx, err := db.Begin()
+ if err != nil {
+ return err
+ }
+ for rows.Next() {
+ var filename string
+ err := rows.Scan(&filename)
+ if err != nil {
+ return err
+ }
+ if _, ok := filenamesOnDisk[filename]; !ok {
+ _, err = tx.Exec("DELETE FROM files WHERE filename = ?", filename)
+ if err != nil {
+ return err
+ }
+ }
+ }
+ if err = tx.Commit(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+const INDEX_ADD_BATCH = 512
+
+func buildIndex(config Config, backend InferenceServerConfig) (Index, error) {
+ var index Index
+
+ db, err := initializeDatabase(config)
+ if err != nil {
+ return index, err
+ }
+ defer db.Close()
+
+ newFAISSIndex, err := faiss.IndexFactory(int(backend.EmbeddingSize), "SQfp16", faiss.MetricInnerProduct)
+ if err != nil {
+ return index, err
+ }
+ index.vectors = newFAISSIndex
+
+ var count int
+ err = db.Get(&count, "SELECT COUNT(*) FROM files")
+ if err != nil {
+ return index, err
+ }
+
+ index.filenames = make([]string, 0, count)
+ index.formatCodes = make([]int64, 0, count)
+ buffer := make([]float32, 0, INDEX_ADD_BATCH*backend.EmbeddingSize)
+ index.formatNames = make([]string, 0, 5)
+
+ record := FileRecord{}
+ rows, err := db.Queryx("SELECT * FROM files")
+ if err != nil {
+ return index, err
+ }
+ for rows.Next() {
+ err := rows.StructScan(&record)
+ if err != nil {
+ return index, err
+ }
+ if len(record.Embedding) > 0 {
+ index.filenames = append(index.filenames, record.Filename)
+ for i := 0; i < len(record.Embedding); i += 2 {
+ buffer = append(buffer, float16.Frombits(uint16(record.Embedding[i])+uint16(record.Embedding[i+1])<<8).Float32())
+ }
+ if len(buffer) == cap(buffer) {
+ index.vectors.Add(buffer)
+ buffer = make([]float32, 0, INDEX_ADD_BATCH*backend.EmbeddingSize)
+ }
+
+ formats := make([]string, 0, 5)
+ if len(record.Thumbnails) > 0 {
+ err := msgpack.Unmarshal(record.Thumbnails, &formats)
+ if err != nil {
+ return index, err
+ }
+ }
+
+ formatCode := int64(0)
+ for _, formatString := range formats {
+ found := false
+ for i, name := range index.formatNames {
+ if name == formatString {
+ formatCode |= 1 << i
+ found = true
+ break
+ }
+ }
+ if !found {
+ newIndex := len(index.formatNames)
+ formatCode |= 1 << newIndex
+ index.formatNames = append(index.formatNames, formatString)
+ }
+ }
+ index.formatCodes = append(index.formatCodes, formatCode)
+ }
+ }
+ if len(buffer) > 0 {
+ index.vectors.Add(buffer)
+ }
+
+ return index, nil
+}
+
+func decodeFP16Buffer(buf []byte) []float32 {
+ out := make([]float32, 0, len(buf)/2)
+ for i := 0; i < len(buf); i += 2 {
+ out = append(out, float16.Frombits(uint16(buf[i])+uint16(buf[i+1])<<8).Float32())
+ }
+ return out
+}
+
+type EmbeddingVector []float32
+
+type QueryResult struct {
+ Matches [][]interface{} `json:"matches"`
+ Formats []string `json:"formats"`
+ Extensions map[string]string `json:"extensions"`
+}
+
+// this terrible language cannot express tagged unions
+type QueryTerm struct {
+ Embedding *EmbeddingVector `json:"embedding"`
+ Image *string `json:"image"` // base64
+ Text *string `json:"text"`
+ Weight *float32 `json:"weight"`
+}
+
+type QueryRequest struct {
+ Terms []QueryTerm `json:"terms"`
+ K *int `json:"k"`
+}
+
+func queryIndex(index *Index, query EmbeddingVector, k int) (QueryResult, error) {
+ var qr QueryResult
+ distances, ids, err := index.vectors.Search(query, int64(k))
+ if err != nil {
+ return qr, err
+ }
+ items := lo.Map(lo.Zip2(distances, ids), func(x lo.Tuple2[float32, int64], i int) []interface{} {
+ return []interface{}{
+ x.A,
+ index.filenames[x.B],
+ generateFilenameHash(index.filenames[x.B]),
+ index.formatCodes[x.B],
+ }
+ })
+
+ return QueryResult{
+ Matches: items,
+ Formats: index.formatNames,
+ }, nil
+}
+
+func handleRequest(config Config, backendConfig InferenceServerConfig, index *Index, w http.ResponseWriter, req *http.Request) error {
+ if req.Body == nil {
+ io.WriteString(w, "OK") // health check
+ return nil
+ }
+ dec := json.NewDecoder(req.Body)
+ var qreq QueryRequest
+ err := dec.Decode(&qreq)
+ if err != nil {
+ return err
+ }
+
+ totalEmbedding := make(EmbeddingVector, backendConfig.EmbeddingSize)
+
+ imageBatch := make([][]byte, 0)
+ imageWeights := make([]float32, 0)
+ textBatch := make([]string, 0)
+ textWeights := make([]float32, 0)
+
+ for _, term := range qreq.Terms {
+ if term.Image != nil {
+ bytes, err := base64.StdEncoding.DecodeString(*term.Image)
+ if err != nil {
+ return err
+ }
+ loaded := bimg.NewImage(bytes)
+ resized, err := loaded.Process(bimg.Options{
+ Width: int(backendConfig.ImageSize[0]),
+ Height: int(backendConfig.ImageSize[1]),
+ Force: true,
+ Type: bimg.PNG,
+ Interpretation: bimg.InterpretationSRGB,
+ })
+ if err != nil {
+ return err
+ }
+ imageBatch = append(imageBatch, resized)
+ if term.Weight != nil {
+ imageWeights = append(imageWeights, *term.Weight)
+ } else {
+ imageWeights = append(imageWeights, 1)
+ }
+ }
+ if term.Text != nil {
+ textBatch = append(textBatch, *term.Text)
+ if term.Weight != nil {
+ textWeights = append(textWeights, *term.Weight)
+ } else {
+ textWeights = append(textWeights, 1)
+ }
+ }
+ if term.Embedding != nil {
+ weight := float32(1.0)
+ if term.Weight != nil {
+ weight = *term.Weight
+ }
+ for i := 0; i < int(backendConfig.EmbeddingSize); i += 1 {
+ totalEmbedding[i] += (*term.Embedding)[i] * weight
+ }
+ }
+ }
+
+ if len(imageBatch) > 0 {
+ embs, err := queryClipServer[EmbeddingRequest, EmbeddingResponse](config, "/", EmbeddingRequest{
+ Images: imageBatch,
+ })
+ if err != nil {
+ return err
+ }
+ for j, emb := range embs {
+ embd := decodeFP16Buffer(emb)
+ for i := 0; i < int(backendConfig.EmbeddingSize); i += 1 {
+ totalEmbedding[i] += embd[i] * imageWeights[j]
+ }
+ }
+ }
+ if len(textBatch) > 0 {
+ embs, err := queryClipServer[EmbeddingRequest, EmbeddingResponse](config, "/", EmbeddingRequest{
+ Text: textBatch,
+ })
+ if err != nil {
+ return err
+ }
+ for j, emb := range embs {
+ embd := decodeFP16Buffer(emb)
+ for i := 0; i < int(backendConfig.EmbeddingSize); i += 1 {
+ totalEmbedding[i] += embd[i] * textWeights[j]
+ }
+ }
+ }
+
+ k := 1000
+ if qreq.K != nil {
+ k = *qreq.K
+ }
+
+ w.Header().Add("Content-Type", "application/json")
+ enc := json.NewEncoder(w)
+
+ qres, err := queryIndex(index, totalEmbedding, k)
+
+ qres.Extensions = make(map[string]string)
+ for k, v := range imageFormats(config) {
+ qres.Extensions[k] = v.extension
+ }
+
+ if err != nil {
+ return err
+ }
+
+ err = enc.Encode(qres)
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+func init() {
+ os.Setenv("VIPS_WARNING", "FALSE") // this does not actually work
+ bimg.VipsCacheSetMax(0)
+ bimg.VipsCacheSetMaxMem(0)
+}
+
+func main() {
+ vips.Startup(&vips.Config{})
+ defer vips.Shutdown()
+
+ content, err := os.ReadFile(os.Args[1])
+ if err != nil {
+ log.Fatal("config file unreadable ", err)
+ }
+ var config Config
+ err = json.Unmarshal(content, &config)
+ if err != nil {
+ log.Fatal("config file wrong ", err)
+ }
+ fmt.Println(config)
+
+ db, err := sqlx.Connect("sqlite3", config.DbPath)
+ if err != nil {
+ log.Fatal("DB connection failure ", db)
+ }
+ db.MustExec(schema)
+
+ var backend InferenceServerConfig
+ for {
+ resp, err := http.Get(config.ClipServer + "/config")
+ if err != nil {
+ log.Println("backend failed (fetch) ", err)
+ }
+ backend, err = decodeMsgpackFrom[InferenceServerConfig](resp)
+ resp.Body.Close()
+ if err != nil {
+ log.Println("backend failed (parse) ", err)
+ } else {
+ break
+ }
+ time.Sleep(time.Second)
+ }
+
+ requestIngest := make(chan struct{}, 1)
+
+ var index *Index
+ // maybe this ought to be mutexed?
+ var lastError *error
+ // there's not a neat way to reusably broadcast to multiple channels, but I *can* abuse WaitGroups probably
+ // this might cause horrible concurrency issues, but you brought me to this point, Go designers
+ var wg sync.WaitGroup
+
+ go func() {
+ for {
+ wg.Add(1)
+ log.Println("ingest running")
+ err := ingestFiles(config, backend)
+ if err != nil {
+ log.Println("ingest failed ", err)
+ lastError = &err
+ } else {
+ newIndex, err := buildIndex(config, backend)
+ if err != nil {
+ log.Println("index build failed ", err)
+ lastError = &err
+ } else {
+ lastError = nil
+ index = &newIndex
+ }
+ }
+ wg.Done()
+ <-requestIngest
+ }
+ }()
+ newIndex, err := buildIndex(config, backend)
+ index = &newIndex
+ if err != nil {
+ log.Fatal("index build failed ", err)
+ }
+
+ http.HandleFunc("/", func(w http.ResponseWriter, req *http.Request) {
+ w.Header().Add("Access-Control-Allow-Origin", "*")
+ w.Header().Add("Access-Control-Allow-Headers", "Content-Type")
+ if req.Method == "OPTIONS" {
+ w.WriteHeader(204)
+ return
+ }
+ err := handleRequest(config, backend, index, w, req)
+ if err != nil {
+ w.Header().Add("Content-Type", "application/json")
+ w.WriteHeader(500)
+ json.NewEncoder(w).Encode(map[string]string{
+ "error": err.Error(),
+ })
+ }
+ })
+ http.HandleFunc("/reload", func(w http.ResponseWriter, req *http.Request) {
+ if req.Method == "POST" {
+ log.Println("requesting index reload")
+ select {
+ case requestIngest <- struct{}{}:
+ default:
+ }
+ wg.Wait()
+ if lastError == nil {
+ w.Write([]byte("OK"))
+ } else {
+ w.WriteHeader(500)
+ w.Write([]byte((*lastError).Error()))
+ }
+ }
+ })
+ http.HandleFunc("/profile", func(w http.ResponseWriter, req *http.Request) {
+ f, err := os.Create("mem.pprof")
+ if err != nil {
+ log.Fatal("could not create memory profile: ", err)
+ }
+ defer f.Close()
+ var m runtime.MemStats
+ runtime.ReadMemStats(&m)
+ log.Printf("Memory usage: Alloc=%v, TotalAlloc=%v, Sys=%v", m.Alloc, m.TotalAlloc, m.Sys)
+ log.Println(bimg.VipsMemory())
+ bimg.VipsDebugInfo()
+ runtime.GC() // Trigger garbage collection
+ if err := pprof.WriteHeapProfile(f); err != nil {
+ log.Fatal("could not write memory profile: ", err)
+ }
+ })
+ log.Println("starting server")
+ http.ListenAndServe(fmt.Sprintf(":%d", config.Port), nil)
+}
diff --git a/misc/bad-go-version/problematic_thing_2.go b/misc/bad-go-version/problematic_thing_2.go
new file mode 100644
index 0000000..3f79685
--- /dev/null
+++ b/misc/bad-go-version/problematic_thing_2.go
@@ -0,0 +1,265 @@
+package main
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "math"
+ "mime/multipart"
+ "net/http"
+ "net/textproto"
+ "regexp"
+ "strings"
+ "time"
+
+ "github.com/davidbyttow/govips/v2/vips"
+ "github.com/samber/lo"
+ "github.com/titanous/json5"
+)
+
+const CALLBACK_REGEX string = ">AF_initDataCallback\\(({key: 'ds:1'.*?)\\);"
+
+type SegmentCoords struct {
+ x int
+ y int
+ w int
+ h int
+}
+
+type Segment struct {
+ coords SegmentCoords
+ text string
+}
+
+type ScanResult []Segment
+
+// TODO coordinates are negative sometimes and I think they shouldn't be
+func rationalizeCoordsFormat1(imageW float64, imageH float64, centerXFraction float64, centerYFraction float64, widthFraction float64, heightFraction float64) SegmentCoords {
+ return SegmentCoords{
+ x: int(math.Round((centerXFraction - widthFraction/2) * imageW)),
+ y: int(math.Round((centerYFraction - heightFraction/2) * imageH)),
+ w: int(math.Round(widthFraction * imageW)),
+ h: int(math.Round(heightFraction * imageH)),
+ }
+}
+
+func scanImageChunk(image []byte, imageWidth int, imageHeight int) (ScanResult, error) {
+ var result ScanResult
+ timestamp := time.Now().UnixMicro()
+ var b bytes.Buffer
+ w := multipart.NewWriter(&b)
+ defer w.Close()
+ h := make(textproto.MIMEHeader)
+ h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="encoded_image"; filename="ocr%d.png"`, timestamp))
+ h.Set("Content-Type", "image/png")
+ fw, err := w.CreatePart(h)
+ if err != nil {
+ return result, err
+ }
+ fw.Write(image)
+ w.Close()
+
+ req, err := http.NewRequest("POST", fmt.Sprintf("https://lens.google.com/v3/upload?stcs=%d", timestamp), &b)
+ if err != nil {
+ return result, err
+ }
+ req.Header.Add("User-Agent", "Mozilla/5.0 (Linux; Android 13; RMX3771) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.144 Mobile Safari/537.36")
+ req.AddCookie(&http.Cookie{
+ Name: "SOCS",
+ Value: "CAESEwgDEgk0ODE3Nzk3MjQaAmVuIAEaBgiA_LyaBg",
+ })
+ req.Header.Set("Content-Type", w.FormDataContentType())
+ client := http.Client{}
+ res, err := client.Do(req)
+ if err != nil {
+ return result, err
+ }
+ defer res.Body.Close()
+ body, err := io.ReadAll(res.Body)
+ if err != nil {
+ return result, err
+ }
+ re, _ := regexp.Compile(CALLBACK_REGEX)
+ matches := re.FindStringSubmatch(string(body[:]))
+ if len(matches) == 0 {
+ return result, fmt.Errorf("invalid API response")
+ }
+ match := matches[1]
+ var lensObject map[string]interface{}
+ err = json5.Unmarshal([]byte(match), &lensObject)
+ if err != nil {
+ return result, err
+ }
+
+ if _, ok := lensObject["errorHasStatus"]; ok {
+ return result, errors.New("lens failed")
+ }
+
+ root := lensObject["data"].([]interface{})
+
+ var textSegments []string
+ var textRegions []SegmentCoords
+
+ // I don't know why Google did this.
+ // Text segments are in one place and their locations are in another, using a very strange coordinate system.
+ // At least I don't need whatever is contained in the base64 parts (which I assume are protobufs).
+ // TODO: on a few images, this seems to not work for some reason.
+ defer func() {
+ if r := recover(); r != nil {
+ // https://github.com/dimdenGD/chrome-lens-ocr/blob/main/src/core.js#L316 has code for a fallback text segment read mode.
+ // In testing, this proved unnecessary (quirks of the HTTP request? I don't know), and this only happens on textless images.
+ textSegments = []string{}
+ textRegions = []SegmentCoords{}
+ }
+ }()
+
+ textSegmentsRaw := root[3].([]interface{})[4].([]interface{})[0].([]interface{})[0].([]interface{})
+ textRegionsRaw := root[2].([]interface{})[3].([]interface{})[0].([]interface{})
+ for _, x := range textRegionsRaw {
+ if strings.HasPrefix(x.([]interface{})[11].(string), "text:") {
+ rawCoords := x.([]interface{})[1].([]interface{})
+ coords := rationalizeCoordsFormat1(float64(imageWidth), float64(imageHeight), rawCoords[0].(float64), rawCoords[1].(float64), rawCoords[2].(float64), rawCoords[3].(float64))
+ textRegions = append(textRegions, coords)
+ }
+ }
+ for _, x := range textSegmentsRaw {
+ textSegment := x.(string)
+ textSegments = append(textSegments, textSegment)
+ }
+
+ return lo.Map(lo.Zip2(textSegments, textRegions), func(x lo.Tuple2[string, SegmentCoords], _ int) Segment {
+ return Segment{
+ text: x.A,
+ coords: x.B,
+ }
+ }), nil
+}
+
+const MAX_DIM int = 1024
+
+func scanImage(image *vips.ImageRef) (ScanResult, error) {
+ result := ScanResult{}
+ width := image.Width()
+ height := image.Height()
+ if width > MAX_DIM {
+ width = MAX_DIM
+ height = int(math.Round(float64(height) * (float64(width) / float64(image.Width()))))
+ }
+ downscaled, err := image.Copy()
+ if err != nil {
+ return result, err
+ }
+ downscaled.Resize(float64(width)/float64(image.Width()), vips.KernelLanczos3)
+ for y := 0; y < height; y += MAX_DIM {
+ chunkHeight := MAX_DIM
+ if y+chunkHeight > height {
+ chunkHeight = height - y
+ }
+ chunk, err := image.Copy() // TODO this really really should not be in-place
+ if err != nil {
+ return result, err
+ }
+ err = chunk.ExtractArea(0, y, width, height)
+ if err != nil {
+ return result, err
+ }
+ buf, _, err := chunk.ExportPng(&vips.PngExportParams{})
+ if err != nil {
+ return result, err
+ }
+ res, err := scanImageChunk(buf, width, chunkHeight)
+ if err != nil {
+ return result, err
+ }
+ for _, segment := range res {
+ result = append(result, Segment{
+ text: segment.text,
+ coords: SegmentCoords{
+ y: segment.coords.y + y,
+ x: segment.coords.x,
+ w: segment.coords.w,
+ h: segment.coords.h,
+ },
+ })
+ }
+ }
+
+ return result, nil
+}
+
+/*
+async def scan_image_chunk(sess, image):
+ # send data to inscrutable undocumented Google service
+ # https://github.com/AuroraWright/owocr/blob/master/owocr/ocr.py#L193
+ async with aiohttp.ClientSession() as sess:
+ data = aiohttp.FormData()
+ data.add_field(
+ "encoded_image",
+ encode_img(image),
+ filename="ocr" + str(timestamp) + ".png",
+ content_type="image/png"
+ )
+ async with sess.post(url, headers=headers, cookies=cookies, data=data, timeout=10) as res:
+ body = await res.text()
+
+ # I really worry about Google sometimes. This is not a sensible format.
+ match = CALLBACK_REGEX.search(body)
+ if match == None:
+ raise ValueError("Invalid callback")
+
+ lens_object = pyjson5.loads(match.group(1))
+ if "errorHasStatus" in lens_object:
+ raise RuntimeError("Lens failed")
+
+ text_segments = []
+ text_regions = []
+
+ root = lens_object["data"]
+
+ # I don't know why Google did this.
+ # Text segments are in one place and their locations are in another, using a very strange coordinate system.
+ # At least I don't need whatever is contained in the base64 partss (which I assume are protobufs).
+ # TODO: on a few images, this seems to not work for some reason.
+ try:
+ text_segments = root[3][4][0][0]
+ text_regions = [ rationalize_coords_format1(image.width, image.height, *x[1]) for x in root[2][3][0] if x[11].startswith("text:") ]
+ except (KeyError, IndexError):
+ # https://github.com/dimdenGD/chrome-lens-ocr/blob/main/src/core.js#L316 has code for a fallback text segment read mode.
+ # In testing, this proved unnecessary (quirks of the HTTP request? I don't know), and this only happens on textless images.
+ return [], []
+
+ return text_segments, text_regions
+
+MAX_SCAN_DIM = 1000 # not actually true but close enough
+def chunk_image(image: Image):
+ chunks = []
+ # Cut image down in X axis (I'm assuming images aren't too wide to scan in downscaled form because merging text horizontally would be annoying)
+ if image.width > MAX_SCAN_DIM:
+ image = image.resize((MAX_SCAN_DIM, round(image.height * (image.width / MAX_SCAN_DIM))), Image.LANCZOS)
+ for y in range(0, image.height, MAX_SCAN_DIM):
+ chunks.append(image.crop((0, y, image.width, min(y + MAX_SCAN_DIM, image.height))))
+ return chunks
+
+async def scan_chunks(sess: aiohttp.ClientSession, chunks: [Image]):
+ # If text happens to be split across the cut line it won't get read.
+ # This is because doing overlap read areas would be really annoying.
+ text = ""
+ regions = []
+ for chunk in chunks:
+ new_segments, new_regions = await scan_image_chunk(sess, chunk)
+ for segment in new_segments:
+ text += segment + "\n"
+ for i, (segment, region) in enumerate(zip(new_segments, new_regions)):
+ regions.append({ **region, "y": region["y"] + (MAX_SCAN_DIM * i), "text": segment })
+ return text, regions
+
+async def scan_image(sess: aiohttp.ClientSession, image: Image):
+ return await scan_chunks(sess, chunk_image(image))
+
+if __name__ == "__main__":
+ async def main():
+ async with aiohttp.ClientSession() as sess:
+ print(await scan_image(sess, Image.open("/data/public/memes-or-something/linear-algebra-chess.png")))
+ asyncio.run(main())
+*/
diff --git a/mse.py b/mse.py
index 36e1fba..782826b 100644
--- a/mse.py
+++ b/mse.py
@@ -12,8 +12,11 @@ import os
import aiohttp_cors
import json
import io
+import time
import sys
-from concurrent.futures import ProcessPoolExecutor
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+import threading
with open(sys.argv[1], "r") as config_file:
CONFIG = json.load(config_file)
@@ -21,26 +24,26 @@ with open(sys.argv[1], "r") as config_file:
app = web.Application(client_max_size=32*1024**2)
routes = web.RouteTableDef()
-async def clip_server(query, unpack_buffer=True):
- async with aiohttp.ClientSession() as sess:
- async with sess.post(CONFIG["clip_server"], data=umsgpack.dumps(query)) as res:
- response = umsgpack.loads(await res.read())
- if res.status == 200:
- if unpack_buffer:
- response = [ numpy.frombuffer(x, dtype="float16") for x in response ]
- return response
- else:
- raise Exception(response if res.headers.get("content-type") == "application/msgpack" else (await res.text()))
+async def clip_server(sess: aiohttp.ClientSession, query, unpack_buffer=True):
+ async with sess.post(CONFIG["clip_server"], data=umsgpack.dumps(query)) as res:
+ response = umsgpack.loads(await res.read())
+ if res.status == 200:
+ if unpack_buffer:
+ response = [ numpy.frombuffer(x, dtype="float16") for x in response ]
+ return response
+ else:
+ raise Exception(response if res.headers.get("content-type") == "application/msgpack" else (await res.text()))
@routes.post("/")
async def run_query(request):
+ sess = app["session"]
data = await request.json()
embeddings = []
if images := data.get("images", []):
target_image_size = app["index"].inference_server_config["image_size"]
- embeddings.extend(await clip_server({ "images": [ load_image(io.BytesIO(base64.b64decode(x)), target_image_size)[0] for x, w in images ] }))
+ embeddings.extend(await clip_server(sess, { "images": [ load_image(io.BytesIO(base64.b64decode(x)), target_image_size)[0] for x, w in images ] }))
if text := data.get("text", []):
- embeddings.extend(await clip_server({ "text": [ x for x, w in text ] }))
+ embeddings.extend(await clip_server(sess, { "text": [ x for x, w in text ] }))
weights = [ w for x, w in images ] + [ w for x, w in text ]
weighted_embeddings = [ e * w for e, w in zip(embeddings, weights) ]
weighted_embeddings.extend([ numpy.array(x) for x in data.get("embeddings", []) ])
@@ -65,11 +68,12 @@ def load_image(path, image_size):
return buf.getvalue(), path
class Index:
- def __init__(self, inference_server_config):
+ def __init__(self, inference_server_config, http_session):
self.faiss_index = faiss.IndexFlatIP(inference_server_config["embedding_size"])
self.associated_filenames = []
self.inference_server_config = inference_server_config
self.lock = asyncio.Lock()
+ self.session = http_session
def search(self, query, top_k):
distances, indices = self.faiss_index.search(numpy.array([query]), top_k)
@@ -80,18 +84,77 @@ class Index:
except IndexError: pass
return [ { "score": float(distance), "file": self.associated_filenames[index] } for index, distance in zip(indices, distances) ]
+ async def run_ocr(self):
+ if not CONFIG.get("enable_ocr"): return
+
+ import ocr
+
+ print("Running OCR")
+
+ conn = await aiosqlite.connect(CONFIG["db_path"])
+ unocred = await conn.execute_fetchall("SELECT files.filename FROM files LEFT JOIN ocr ON files.filename = ocr.filename WHERE ocr.scan_time IS NULL OR ocr.scan_time < files.modtime")
+
+ ocr_sem = asyncio.Semaphore(20) # Google has more concurrency than our internal CLIP backend. I am sure they will be fine.
+ load_sem = threading.Semaphore(100) # provide backpressure in loading to avoid using 50GB of RAM (this happened)
+
+ async def run_image(filename, chunks):
+ try:
+ text, regions = await ocr.scan_chunks(self.session, chunks)
+ await conn.execute("INSERT OR REPLACE INTO ocr VALUES (?, ?, ?, ?)", (filename, time.time(), text, json.dumps(regions)))
+ await conn.commit()
+ sys.stdout.write(".")
+ sys.stdout.flush()
+ except:
+ print("OCR failed on", filename)
+ finally:
+ ocr_sem.release()
+
+ def load_and_chunk_image(filename):
+ load_sem.acquire()
+ im = Image.open(Path(CONFIG["files"]) / filename)
+ return filename, ocr.chunk_image(im)
+
+ async with asyncio.TaskGroup() as tg:
+ with ThreadPoolExecutor(max_workers=CONFIG.get("n_workers", 1)) as executor:
+ for task in asyncio.as_completed([ asyncio.get_running_loop().run_in_executor(executor, load_and_chunk_image, file[0]) for file in unocred ]):
+ filename, chunks = await task
+ await ocr_sem.acquire()
+ tg.create_task(run_image(filename, chunks))
+ load_sem.release()
+
async def reload(self):
async with self.lock:
- with ProcessPoolExecutor(max_workers=12) as executor:
+ with ThreadPoolExecutor(max_workers=CONFIG.get("n_workers", 1)) as executor:
print("Indexing")
conn = await aiosqlite.connect(CONFIG["db_path"])
conn.row_factory = aiosqlite.Row
await conn.executescript("""
- CREATE TABLE IF NOT EXISTS files (
- filename TEXT PRIMARY KEY,
- modtime REAL NOT NULL,
- embedding_vector BLOB NOT NULL
- );
+CREATE TABLE IF NOT EXISTS files (
+ filename TEXT PRIMARY KEY,
+ modtime REAL NOT NULL,
+ embedding_vector BLOB NOT NULL
+);
+CREATE TABLE IF NOT EXISTS ocr (
+ filename TEXT PRIMARY KEY REFERENCES files(filename),
+ scan_time INTEGER NOT NULL,
+ text TEXT NOT NULL,
+ raw_segments TEXT
+);
+
+CREATE VIRTUAL TABLE IF NOT EXISTS ocr_fts USING fts5 (
+ filename,
+ text,
+ tokenize='unicode61 remove_diacritics 2',
+ content='ocr'
+);
+
+CREATE TRIGGER IF NOT EXISTS ocr_fts_ins AFTER INSERT ON ocr BEGIN
+ INSERT INTO ocr_fts (rowid, filename, text) VALUES (new.rowid, new.filename, new.text);
+END;
+
+CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER DELETE ON ocr BEGIN
+ INSERT INTO ocr_fts (ocr_fts, rowid, filename, text) VALUES ('delete', old.rowid, old.filename, old.text);
+END;
""")
try:
async with asyncio.TaskGroup() as tg:
@@ -102,7 +165,7 @@ class Index:
async def do_batch(batch):
try:
query = { "images": [ arg[2] for arg in batch ] }
- embeddings = await clip_server(query, False)
+ embeddings = await clip_server(self.session, query, False)
await conn.executemany("INSERT OR REPLACE INTO files VALUES (?, ?, ?)", [
(filename, modtime, embedding) for (filename, modtime, _), embedding in zip(batch, embeddings)
])
@@ -188,6 +251,8 @@ class Index:
finally:
await conn.close()
+ await self.run_ocr()
+
app.router.add_routes(routes)
cors = aiohttp_cors.setup(app, defaults={
@@ -201,8 +266,8 @@ for route in list(app.router.routes()):
cors.add(route)
async def main():
+ sess = aiohttp.ClientSession()
while True:
- async with aiohttp.ClientSession() as sess:
try:
async with await sess.get(CONFIG["clip_server"] + "config") as res:
inference_server_config = umsgpack.unpackb(await res.read())
@@ -211,8 +276,9 @@ async def main():
except:
traceback.print_exc()
await asyncio.sleep(1)
- index = Index(inference_server_config)
+ index = Index(inference_server_config, sess)
app["index"] = index
+ app["session"] = sess
await index.reload()
print("Ready")
if CONFIG.get("no_run_server", False): return
diff --git a/mse_config.json b/mse_config.json
index 254da66..07c3210 100644
--- a/mse_config.json
+++ b/mse_config.json
@@ -1,6 +1,9 @@
{
- "clip_server": "http://localhost:1708/",
- "db_path": "/srv/mse/data.sqlite3",
+ "clip_server": "http://100.64.0.10:1708",
+ "db_path": "data.sqlite3",
"port": 1707,
- "files": "/data/public/memes-or-something/"
+ "files": "/data/public/memes-or-something/",
+ "enable_ocr": false,
+ "thumbs_path": "./thumbtemp",
+ "enable_thumbs": false
}
\ No newline at end of file
diff --git a/ocr.py b/ocr.py
new file mode 100644
index 0000000..7c4f8c9
--- /dev/null
+++ b/ocr.py
@@ -0,0 +1,101 @@
+import pyjson5
+import re
+import asyncio
+import aiohttp
+from PIL import Image
+import time
+import io
+
+CALLBACK_REGEX = re.compile(r">AF_initDataCallback\(({key: 'ds:1'.*?)\);")
+
+def encode_img(img):
+ image_bytes = io.BytesIO()
+ img.save(image_bytes, format="PNG", compress_level=6)
+ return image_bytes.getvalue()
+
+def rationalize_coords_format1(image_w, image_h, center_x_fraction, center_y_fraction, width_fraction, height_fraction, mysterious):
+ return {
+ "x": round((center_x_fraction - width_fraction / 2) * image_w),
+ "y": round((center_y_fraction - height_fraction / 2) * image_h),
+ "w": round(width_fraction * image_w),
+ "h": round(height_fraction * image_h)
+ }
+
+async def scan_image_chunk(sess, image):
+ timestamp = int(time.time() * 1000)
+ url = f"https://lens.google.com/v3/upload?stcs={timestamp}"
+ headers = {"User-Agent": "Mozilla/5.0 (Linux; Android 13; RMX3771) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.144 Mobile Safari/537.36"}
+ cookies = {"SOCS": "CAESEwgDEgk0ODE3Nzk3MjQaAmVuIAEaBgiA_LyaBg"}
+
+ # send data to inscrutable undocumented Google service
+ # https://github.com/AuroraWright/owocr/blob/master/owocr/ocr.py#L193
+ async with aiohttp.ClientSession() as sess:
+ data = aiohttp.FormData()
+ data.add_field(
+ "encoded_image",
+ encode_img(image),
+ filename="ocr" + str(timestamp) + ".png",
+ content_type="image/png"
+ )
+ async with sess.post(url, headers=headers, cookies=cookies, data=data, timeout=10) as res:
+ body = await res.text()
+
+ # I really worry about Google sometimes. This is not a sensible format.
+ match = CALLBACK_REGEX.search(body)
+ if match == None:
+ raise ValueError("Invalid callback")
+
+ lens_object = pyjson5.loads(match.group(1))
+ if "errorHasStatus" in lens_object:
+ raise RuntimeError("Lens failed")
+
+ text_segments = []
+ text_regions = []
+
+ root = lens_object["data"]
+
+ # I don't know why Google did this.
+ # Text segments are in one place and their locations are in another, using a very strange coordinate system.
+ # At least I don't need whatever is contained in the base64 parts (which I assume are protobufs).
+ # TODO: on a few images, this seems to not work for some reason.
+ try:
+ text_segments = root[3][4][0][0]
+ text_regions = [ rationalize_coords_format1(image.width, image.height, *x[1]) for x in root[2][3][0] if x[11].startswith("text:") ]
+ except (KeyError, IndexError):
+ # https://github.com/dimdenGD/chrome-lens-ocr/blob/main/src/core.js#L316 has code for a fallback text segment read mode.
+ # In testing, this proved unnecessary (quirks of the HTTP request? I don't know), and this only happens on textless images.
+ return [], []
+
+ return text_segments, text_regions
+
+MAX_SCAN_DIM = 1000 # not actually true but close enough
+def chunk_image(image: Image):
+ chunks = []
+ # Cut image down in X axis (I'm assuming images aren't too wide to scan in downscaled form because merging text horizontally would be annoying)
+ if image.width > MAX_SCAN_DIM:
+ image = image.resize((MAX_SCAN_DIM, round(image.height * (image.width / MAX_SCAN_DIM))), Image.LANCZOS)
+ for y in range(0, image.height, MAX_SCAN_DIM):
+ chunks.append(image.crop((0, y, image.width, min(y + MAX_SCAN_DIM, image.height))))
+ return chunks
+
+async def scan_chunks(sess: aiohttp.ClientSession, chunks: [Image]):
+ # If text happens to be split across the cut line it won't get read.
+ # This is because doing overlap read areas would be really annoying.
+ text = ""
+ regions = []
+ for chunk in chunks:
+ new_segments, new_regions = await scan_image_chunk(sess, chunk)
+ for segment in new_segments:
+ text += segment + "\n"
+ for i, (segment, region) in enumerate(zip(new_segments, new_regions)):
+ regions.append({ **region, "y": region["y"] + (MAX_SCAN_DIM * i), "text": segment })
+ return text, regions
+
+async def scan_image(sess: aiohttp.ClientSession, image: Image):
+ return await scan_chunks(sess, chunk_image(image))
+
+if __name__ == "__main__":
+ async def main():
+ async with aiohttp.ClientSession() as sess:
+ print(await scan_image(sess, Image.open("/data/public/memes-or-something/linear-algebra-chess.png")))
+ asyncio.run(main())
\ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..2c37f64
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,892 @@
+use std::{collections::HashMap, io::Cursor};
+use std::path::Path;
+use std::sync::Arc;
+
+use anyhow::{Result, Context};
+use axum::body::Body;
+use axum::response::Response;
+use axum::{
+ extract::Json,
+ response::IntoResponse,
+ routing::{get, post},
+ Router,
+ http::StatusCode
+};
+use image::{imageops::FilterType, io::Reader as ImageReader, DynamicImage, ImageFormat};
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+use sqlx::{sqlite::SqliteConnectOptions, SqlitePool};
+use tokio::sync::{broadcast, mpsc};
+use tokio::task::JoinHandle;
+use walkdir::WalkDir;
+use base64::prelude::*;
+use faiss::Index;
+use futures_util::stream::{StreamExt, TryStreamExt};
+use tokio_stream::wrappers::ReceiverStream;
+use tower_http::cors::CorsLayer;
+
+mod ocr;
+
+use crate::ocr::scan_image;
+
+fn function_which_returns_50() -> usize { 50 }
+
+#[derive(Debug, Deserialize, Clone)]
+struct Config {
+ clip_server: String,
+ db_path: String,
+ port: u16,
+ files: String,
+ #[serde(default)]
+ enable_ocr: bool,
+ #[serde(default)]
+ thumbs_path: String,
+ #[serde(default)]
+ enable_thumbs: bool,
+ #[serde(default="function_which_returns_50")]
+ ocr_concurrency: usize,
+ #[serde(default)]
+ no_run_server: bool
+}
+
+#[derive(Debug)]
+struct IIndex {
+ vectors: faiss::index::IndexImpl,
+ filenames: Vec,
+ format_codes: Vec,
+ format_names: Vec,
+}
+
+const SCHEMA: &str = r#"
+CREATE TABLE IF NOT EXISTS files (
+ filename TEXT NOT NULL PRIMARY KEY,
+ embedding_time INTEGER,
+ ocr_time INTEGER,
+ thumbnail_time INTEGER,
+ embedding BLOB,
+ ocr TEXT,
+ raw_ocr_segments BLOB,
+ thumbnails BLOB
+);
+
+CREATE VIRTUAL TABLE IF NOT EXISTS ocr_fts USING fts5 (
+ filename,
+ ocr,
+ tokenize='unicode61 remove_diacritics 2',
+ content='ocr'
+);
+
+CREATE TRIGGER IF NOT EXISTS ocr_fts_ins AFTER INSERT ON files BEGIN
+ INSERT INTO ocr_fts (rowid, filename, ocr) VALUES (new.rowid, new.filename, COALESCE(new.ocr, ''));
+END;
+
+CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER DELETE ON files BEGIN
+ INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, ''));
+END;
+
+CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER UPDATE ON files BEGIN
+ INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, ''));
+ INSERT INTO ocr_fts (rowid, filename, text) VALUES (new.rowid, new.filename, COALESCE(new.ocr, ''));
+END;
+"#;
+
+#[derive(Debug, sqlx::FromRow, Clone, Default)]
+struct FileRecord {
+ filename: String,
+ embedding_time: Option,
+ ocr_time: Option,
+ thumbnail_time: Option,
+ embedding: Option>,
+ // this totally "will" be used later
+ ocr: Option,
+ raw_ocr_segments: Option>,
+ thumbnails: Option>,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+struct InferenceServerConfig {
+ batch: usize,
+ image_size: (u32, u32),
+ embedding_size: usize,
+}
+
+async fn query_clip_server(
+ client: &Client,
+ config: &Config,
+ path: &str,
+ data: I,
+) -> Result where I: Serialize, O: serde::de::DeserializeOwned,
+{
+ let response = client
+ .post(&format!("{}{}", config.clip_server, path))
+ .header("Content-Type", "application/msgpack")
+ .body(rmp_serde::to_vec_named(&data)?)
+ .send()
+ .await?;
+ let result: O = rmp_serde::from_slice(&response.bytes().await?)?;
+ Ok(result)
+}
+
+#[derive(Debug)]
+struct LoadedImage {
+ image: Arc,
+ filename: String,
+ original_size: usize,
+}
+
+#[derive(Debug)]
+struct EmbeddingInput {
+ image: Vec,
+ filename: String,
+}
+
+#[derive(Debug, Serialize)]
+#[serde(untagged)]
+enum EmbeddingRequest {
+ Images { images: Vec },
+ Text { text: Vec }
+}
+
+fn timestamp() -> i64 {
+ chrono::Utc::now().timestamp_micros()
+}
+
+#[derive(Debug, Clone)]
+struct ImageFormatConfig {
+ target_width: u32,
+ target_filesize: u32,
+ quality: u8,
+ format: ImageFormat,
+ extension: String,
+}
+
+fn generate_filename_hash(filename: &str) -> String {
+ use std::hash::{Hash, Hasher};
+ let mut hasher = fnv::FnvHasher::default();
+ filename.hash(&mut hasher);
+ BASE64_URL_SAFE_NO_PAD.encode(hasher.finish().to_le_bytes())
+}
+
+fn generate_thumbnail_filename(
+ filename: &str,
+ format_name: &str,
+ format_config: &ImageFormatConfig,
+) -> String {
+ format!(
+ "{}{}.{}",
+ generate_filename_hash(filename),
+ format_name,
+ format_config.extension
+ )
+}
+
+async fn initialize_database(config: &Config) -> Result {
+ let connection_options = SqliteConnectOptions::new()
+ .filename(&config.db_path)
+ .create_if_missing(true);
+ let pool = SqlitePool::connect_with(connection_options).await?;
+ sqlx::query(SCHEMA).execute(&pool).await?;
+ Ok(pool)
+}
+
+fn image_formats(_config: &Config) -> HashMap {
+ let mut formats = HashMap::new();
+ formats.insert(
+ "jpegl".to_string(),
+ ImageFormatConfig {
+ target_width: 800,
+ target_filesize: 0,
+ quality: 70,
+ format: ImageFormat::Jpeg,
+ extension: "jpg".to_string(),
+ },
+ );
+ formats.insert(
+ "jpegh".to_string(),
+ ImageFormatConfig {
+ target_width: 1600,
+ target_filesize: 0,
+ quality: 80,
+ format: ImageFormat::Jpeg,
+ extension: "jpg".to_string(),
+ },
+ );
+ formats.insert(
+ "jpeg256kb".to_string(),
+ ImageFormatConfig {
+ target_width: 500,
+ target_filesize: 256000,
+ quality: 0,
+ format: ImageFormat::Jpeg,
+ extension: "jpg".to_string(),
+ },
+ );
+ formats.insert(
+ "avifh".to_string(),
+ ImageFormatConfig {
+ target_width: 1600,
+ target_filesize: 0,
+ quality: 80,
+ format: ImageFormat::Avif,
+ extension: "avif".to_string(),
+ },
+ );
+ formats.insert(
+ "avifl".to_string(),
+ ImageFormatConfig {
+ target_width: 800,
+ target_filesize: 0,
+ quality: 30,
+ format: ImageFormat::Avif,
+ extension: "avif".to_string(),
+ },
+ );
+ formats
+}
+
+async fn resize_for_embed(backend_config: Arc, image: Arc) -> Result> {
+ let resized = tokio::task::spawn_blocking(move || {
+ let new = image.resize(
+ backend_config.image_size.0,
+ backend_config.image_size.1,
+ FilterType::Lanczos3
+ );
+ let mut buf = Vec::new();
+ let mut csr = Cursor::new(&mut buf);
+ new.write_to(&mut csr, ImageFormat::Png)?;
+ Ok::, anyhow::Error>(buf)
+ }).await??;
+ Ok(resized)
+}
+
+async fn ingest_files(config: Arc, backend: Arc) -> Result<()> {
+ let pool = initialize_database(&config).await?;
+ let client = Client::new();
+
+ let formats = image_formats(&config);
+
+ let (to_process_tx, to_process_rx) = mpsc::channel::(100);
+ let (to_embed_tx, to_embed_rx) = mpsc::channel(backend.batch as usize);
+ let (to_thumbnail_tx, to_thumbnail_rx) = mpsc::channel(30);
+ let (to_ocr_tx, to_ocr_rx) = mpsc::channel(30);
+
+ let cpus = num_cpus::get();
+
+ // Image loading and preliminary resizing
+ let image_loading: JoinHandle> = tokio::spawn({
+ let config = config.clone();
+ let backend = backend.clone();
+ let stream = ReceiverStream::new(to_process_rx).map(Ok);
+ stream.try_for_each_concurrent(Some(cpus), move |record| {
+ let config = config.clone();
+ let backend = backend.clone();
+ let to_embed_tx = to_embed_tx.clone();
+ let to_thumbnail_tx = to_thumbnail_tx.clone();
+ let to_ocr_tx = to_ocr_tx.clone();
+ async move {
+ let path = Path::new(&config.files).join(&record.filename);
+ let image: Result> = tokio::task::block_in_place(|| Ok(Arc::new(ImageReader::open(&path)?.with_guessed_format()?.decode()?)));
+ let image = match image {
+ Ok(image) => image,
+ Err(e) => {
+ log::error!("Could not read {}: {}", record.filename, e);
+ return Ok(())
+ }
+ };
+ if record.embedding.is_none() {
+ let resized = resize_for_embed(backend.clone(), image.clone()).await?;
+
+ to_embed_tx.send(EmbeddingInput { image: resized, filename: record.filename.clone() }).await?
+ }
+ if record.thumbnails.is_none() && config.enable_thumbs {
+ to_thumbnail_tx
+ .send(LoadedImage {
+ image: image.clone(),
+ filename: record.filename.clone(),
+ original_size: std::fs::metadata(&path)?.len() as usize,
+ })
+ .await?;
+ }
+ if record.raw_ocr_segments.is_none() && config.enable_ocr {
+ to_ocr_tx
+ .send(LoadedImage {
+ image,
+ filename: record.filename.clone(),
+ original_size: 0,
+ })
+ .await?;
+ }
+ Ok(())
+ }
+ })
+ });
+
+ // Thumbnail generation
+ let thumbnail_generation: Option>> = if config.enable_thumbs {
+ let config = config.clone();
+ let pool = pool.clone();
+ let stream = ReceiverStream::new(to_thumbnail_rx).map(Ok);
+ let formats = Arc::new(formats);
+ Some(tokio::spawn({
+ stream.try_for_each_concurrent(Some(cpus), move |image| {
+ use image::codecs::*;
+
+ let formats = formats.clone();
+ let config = config.clone();
+ let pool = pool.clone();
+ async move {
+ let filename = image.filename.clone();
+ log::debug!("thumbnailing {}", filename);
+ let generated_formats = tokio::task::spawn_blocking(move || {
+ let mut generated_formats = Vec::new();
+ let rgb = DynamicImage::from(image.image.to_rgb8());
+ for (format_name, format_config) in &*formats {
+ let resized = if format_config.target_filesize != 0 {
+ let mut lb = 1;
+ let mut ub = 100;
+ loop {
+ let quality = (lb + ub) / 2;
+ let thumbnail = rgb.resize(
+ format_config.target_width,
+ u32::MAX,
+ FilterType::Lanczos3,
+ );
+ let mut buf: Vec = Vec::new();
+ let mut csr = Cursor::new(&mut buf);
+ // this is ugly but I don't actually know how to fix it (cannot factor it out due to issues with dyn Trait)
+ match format_config.format {
+ ImageFormat::Avif => thumbnail.write_with_encoder(avif::AvifEncoder::new_with_speed_quality(&mut csr, 4, quality)),
+ ImageFormat::Jpeg => thumbnail.write_with_encoder(jpeg::JpegEncoder::new_with_quality(&mut csr, quality)),
+ _ => unimplemented!()
+ }?;
+ if buf.len() > image.original_size {
+ ub = quality;
+ } else {
+ lb = quality + 1;
+ }
+ if lb >= ub {
+ break buf;
+ }
+ }
+ } else {
+ let thumbnail = rgb.resize(
+ format_config.target_width,
+ u32::MAX,
+ FilterType::Lanczos3,
+ );
+ let mut buf: Vec = Vec::new();
+ let mut csr = Cursor::new(&mut buf);
+ match format_config.format {
+ ImageFormat::Avif => thumbnail.write_with_encoder(avif::AvifEncoder::new_with_speed_quality(&mut csr, 4, format_config.quality)),
+ ImageFormat::Jpeg => thumbnail.write_with_encoder(jpeg::JpegEncoder::new_with_quality(&mut csr, format_config.quality)),
+ ImageFormat::WebP => thumbnail.write_with_encoder(webp::WebPEncoder::new_lossless(&mut csr)),
+ _ => unimplemented!()
+ }?;
+ buf
+ };
+ if resized.len() < image.original_size {
+ generated_formats.push(format_name.clone());
+ let thumbnail_path = Path::new(&config.thumbs_path).join(
+ generate_thumbnail_filename(
+ &image.filename,
+ format_name,
+ format_config,
+ ),
+ );
+ std::fs::write(thumbnail_path, resized)?;
+ }
+ }
+ Ok::, anyhow::Error>(generated_formats)
+ }).await??;
+ let formats_data = rmp_serde::to_vec(&generated_formats)?;
+ let ts = timestamp();
+ sqlx::query!(
+ "UPDATE files SET thumbnails = ?, thumbnail_time = ? WHERE filename = ?",
+ formats_data,
+ ts,
+ filename
+ )
+ .execute(&pool)
+ .await?;
+ Ok(())
+ }
+ })
+ }))
+ } else {
+ None
+ };
+
+ // OCR
+ let ocr: Option>> = if config.enable_ocr {
+ let client = client.clone();
+ let pool = pool.clone();
+ let stream = ReceiverStream::new(to_ocr_rx).map(Ok);
+ Some(tokio::spawn({
+ stream.try_for_each_concurrent(Some(config.ocr_concurrency), move |image| {
+ let client = client.clone();
+ let pool = pool.clone();
+ async move {
+ log::debug!("OCRing {}", image.filename);
+ let scan = match scan_image(&client, &image.image).await {
+ Ok(scan) => scan,
+ Err(e) => {
+ log::error!("OCR failure {}: {}", image.filename, e);
+ return Ok(())
+ }
+ };
+ let ocr_text = scan
+ .iter()
+ .map(|segment| segment.text.clone())
+ .collect::>()
+ .join("\n");
+ let ocr_data = rmp_serde::to_vec(&scan)?;
+ let ts = timestamp();
+ sqlx::query!(
+ "UPDATE files SET ocr = ?, raw_ocr_segments = ?, ocr_time = ? WHERE filename = ?",
+ ocr_text,
+ ocr_data,
+ ts,
+ image.filename
+ )
+ .execute(&pool)
+ .await?;
+ Ok(())
+ }
+ })
+ }))
+ } else {
+ None
+ };
+
+ let embedding_generation: JoinHandle> = tokio::spawn({
+ let stream = ReceiverStream::new(to_embed_rx).chunks(backend.batch);
+ let client = client.clone();
+ let config = config.clone();
+ let pool = pool.clone();
+ // keep multiple embedding requests in flight
+ stream.map(Ok).try_for_each_concurrent(Some(3), move |batch| {
+ let client = client.clone();
+ let config = config.clone();
+ let pool = pool.clone();
+ async move {
+ let result: Vec = query_clip_server(
+ &client,
+ &config,
+ "",
+ EmbeddingRequest::Images {
+ images: batch.iter().map(|input| serde_bytes::ByteBuf::from(input.image.clone())).collect(),
+ },
+ ).await.context("querying CLIP server")?;
+
+ let mut tx = pool.begin().await?;
+ let ts = timestamp();
+ for (i, vector) in result.into_iter().enumerate() {
+ let vector = vector.into_vec();
+ log::debug!("embedded {}", batch[i].filename);
+ sqlx::query!(
+ "UPDATE files SET embedding_time = ?, embedding = ? WHERE filename = ?",
+ ts,
+ vector,
+ batch[i].filename
+ )
+ .execute(&mut *tx)
+ .await?;
+ }
+ tx.commit().await?;
+ anyhow::Result::Ok(())
+ }
+ })
+ });
+
+ let mut filenames = HashMap::new();
+
+ // blocking OS calls
+ tokio::task::block_in_place(|| -> anyhow::Result<()> {
+ for entry in WalkDir::new(config.files.as_str()) {
+ let entry = entry?;
+ let path = entry.path();
+ if path.is_file() {
+ let filename = path.strip_prefix(&config.files)?.to_str().unwrap().to_string();
+ let modtime = entry.metadata()?.modified()?.duration_since(std::time::UNIX_EPOCH)?;
+ let modtime = modtime.as_micros() as i64;
+ filenames.insert(filename.clone(), (path.to_path_buf(), modtime));
+ }
+ }
+ Ok(())
+ })?;
+
+ log::debug!("finished reading filenames");
+
+ for (filename, (_path, modtime)) in filenames.iter() {
+ let modtime = *modtime;
+ let record = sqlx::query_as!(FileRecord, "SELECT * FROM files WHERE filename = ?", filename)
+ .fetch_optional(&pool)
+ .await?;
+
+ let new_record = match record {
+ None => Some(FileRecord {
+ filename: filename.clone(),
+ ..Default::default()
+ }),
+ Some(r) if modtime > r.embedding_time.unwrap_or(i64::MIN) || (modtime > r.ocr_time.unwrap_or(i64::MIN) && config.enable_ocr) || (modtime > r.thumbnail_time.unwrap_or(i64::MIN) && config.enable_thumbs) => {
+ Some(r)
+ },
+ _ => None
+ };
+ if let Some(mut record) = new_record {
+ log::debug!("processing {}", record.filename);
+ sqlx::query!("INSERT OR IGNORE INTO files (filename) VALUES (?)", filename)
+ .execute(&pool)
+ .await?;
+ if modtime > record.embedding_time.unwrap_or(i64::MIN) {
+ record.embedding = None;
+ }
+ if modtime > record.ocr_time.unwrap_or(i64::MIN) {
+ record.raw_ocr_segments = None;
+ }
+ if modtime > record.thumbnail_time.unwrap_or(i64::MIN) {
+ record.thumbnails = None;
+ }
+ // we need to exit here to actually capture the error
+ if !to_process_tx.send(record).await.is_ok() {
+ break
+ }
+ }
+ }
+
+ drop(to_process_tx);
+
+ embedding_generation.await?.context("generating embeddings")?;
+
+ if let Some(thumbnail_generation) = thumbnail_generation {
+ thumbnail_generation.await?.context("generating thumbnails")?;
+ }
+
+ if let Some(ocr) = ocr {
+ ocr.await?.context("OCRing")?;
+ }
+
+ image_loading.await?.context("loading images")?;
+
+ let stored: Vec = sqlx::query_scalar("SELECT filename FROM files").fetch_all(&pool).await?;
+ let mut tx = pool.begin().await?;
+ for filename in stored {
+ if !filenames.contains_key(&filename) {
+ sqlx::query!("DELETE FROM files WHERE filename = ?", filename)
+ .execute(&mut *tx)
+ .await?;
+ }
+ }
+ tx.commit().await?;
+
+ log::info!("ingest done");
+
+ Result::Ok(())
+}
+
+const INDEX_ADD_BATCH: usize = 512;
+
+async fn build_index(config: Arc, backend: Arc) -> Result {
+ let pool = initialize_database(&config).await?;
+
+ let mut index = IIndex {
+ // Use a suitable vector similarity search library for Rust
+ vectors: faiss::index_factory(backend.embedding_size as u32, "SQfp16", faiss::MetricType::InnerProduct)?,
+ filenames: Vec::new(),
+ format_codes: Vec::new(),
+ format_names: Vec::new(),
+ };
+
+ let count: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM files")
+ .fetch_one(&pool)
+ .await?;
+
+ index.filenames = Vec::with_capacity(count as usize);
+ index.format_codes = Vec::with_capacity(count as usize);
+ let mut buffer = Vec::with_capacity(INDEX_ADD_BATCH * backend.embedding_size as usize);
+ index.format_names = Vec::with_capacity(5);
+
+ let mut rows = sqlx::query_as::<_, FileRecord>("SELECT * FROM files").fetch(&pool);
+ while let Some(record) = rows.try_next().await? {
+ if let Some(emb) = record.embedding {
+ index.filenames.push(record.filename);
+ for i in (0..emb.len()).step_by(2) {
+ buffer.push(
+ half::f16::from_le_bytes([emb[i], emb[i + 1]])
+ .to_f32(),
+ );
+ }
+ if buffer.len() == buffer.capacity() {
+ index.vectors.add(&buffer)?;
+ buffer.clear();
+ }
+
+ let mut formats: Vec = Vec::new();
+ if let Some(t) = record.thumbnails {
+ formats = rmp_serde::from_slice(&t)?;
+ }
+
+ let mut format_code = 0;
+ for format_string in &formats {
+ let mut found = false;
+ for (i, name) in index.format_names.iter().enumerate() {
+ if name == format_string {
+ format_code |= 1 << i;
+ found = true;
+ break;
+ }
+ }
+ if !found {
+ let new_index = index.format_names.len();
+ format_code |= 1 << new_index;
+ index.format_names.push(format_string.clone());
+ }
+ }
+ index.format_codes.push(format_code);
+ }
+ }
+ if !buffer.is_empty() {
+ index.vectors.add(&buffer)?;
+ }
+
+ Ok(index)
+}
+
+fn decode_fp16_buffer(buf: &[u8]) -> Vec {
+ buf.chunks_exact(2)
+ .map(|chunk| half::f16::from_le_bytes([chunk[0], chunk[1]]).to_f32())
+ .collect()
+}
+
+type EmbeddingVector = Vec;
+
+#[derive(Debug, Serialize)]
+struct QueryResult {
+ matches: Vec<(f32, String, String, u64)>,
+ formats: Vec,
+ extensions: HashMap,
+}
+
+#[derive(Debug, Deserialize)]
+struct QueryTerm {
+ embedding: Option,
+ image: Option,
+ text: Option,
+ weight: Option,
+}
+
+#[derive(Debug, Deserialize)]
+struct QueryRequest {
+ terms: Vec,
+ k: Option,
+}
+
+async fn query_index(index: &mut IIndex, query: EmbeddingVector, k: usize) -> Result {
+ let result = index.vectors.search(&query, k as usize)?;
+
+ let items = result.distances
+ .into_iter()
+ .zip(result.labels)
+ .filter_map(|(distance, id)| {
+ let id = id.get()? as usize;
+ Some((
+ distance,
+ index.filenames[id].clone(),
+ generate_filename_hash(&index.filenames[id as usize]).clone(),
+ index.format_codes[id]
+ ))
+ })
+ .collect();
+
+ Ok(QueryResult {
+ matches: items,
+ formats: index.format_names.clone(),
+ extensions: HashMap::new(),
+ })
+}
+
+async fn handle_request(
+ config: &Config,
+ backend_config: Arc,
+ client: Arc,
+ index: &mut IIndex,
+ req: Json,
+) -> Result> {
+ let mut total_embedding = ndarray::Array::from(vec![0.0; backend_config.embedding_size]);
+
+ let mut image_batch = Vec::new();
+ let mut image_weights = Vec::new();
+ let mut text_batch = Vec::new();
+ let mut text_weights = Vec::new();
+
+ for term in &req.terms {
+ if let Some(image) = &term.image {
+ let bytes = BASE64_STANDARD.decode(image)?;
+ let image = Arc::new(tokio::task::block_in_place(|| image::load_from_memory(&bytes))?);
+ image_batch.push(serde_bytes::ByteBuf::from(resize_for_embed(backend_config.clone(), image).await?));
+ image_weights.push(term.weight.unwrap_or(1.0));
+ }
+ if let Some(text) = &term.text {
+ text_batch.push(text.clone());
+ text_weights.push(term.weight.unwrap_or(1.0));
+ }
+ if let Some(embedding) = &term.embedding {
+ let weight = term.weight.unwrap_or(1.0);
+ for (i, value) in embedding.iter().enumerate() {
+ total_embedding[i] += value * weight;
+ }
+ }
+ }
+
+ let mut batches = vec![];
+
+ if !image_batch.is_empty() {
+ batches.push(
+ EmbeddingRequest::Images {
+ images: image_batch
+ }
+ );
+ }
+ if !text_batch.is_empty() {
+ batches.push(
+ EmbeddingRequest::Text {
+ text: text_batch,
+ }
+ );
+ }
+
+ for batch in batches {
+ let embs: Vec> = query_clip_server(&client, config, "/", batch).await?;
+ for emb in embs {
+ total_embedding += &ndarray::Array::from_vec(decode_fp16_buffer(&emb));
+ }
+ }
+
+ let k = req.k.unwrap_or(1000);
+ let qres = query_index(index, total_embedding.to_vec(), k).await?;
+
+ let mut extensions = HashMap::new();
+ for (k, v) in image_formats(config) {
+ extensions.insert(k, v.extension);
+ }
+
+ Ok(Json(QueryResult {
+ matches: qres.matches,
+ formats: qres.formats,
+ extensions,
+ }).into_response())
+}
+
+async fn get_backend_config(config: &Config) -> Result {
+ let res = Client::new().get(&format!("{}/config", config.clip_server)).send().await?;
+ Ok(rmp_serde::from_slice(&res.bytes().await?)?)
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+ pretty_env_logger::init();
+
+ let config_path = std::env::args().nth(1).expect("Missing config file path");
+ let config: Arc = Arc::new(serde_json::from_slice(&std::fs::read(config_path)?)?);
+
+ let pool = initialize_database(&config).await?;
+ sqlx::query(SCHEMA).execute(&pool).await?;
+
+ let backend = Arc::new(loop {
+ match get_backend_config(&config).await {
+ Ok(backend) => break backend,
+ Err(e) => {
+ log::error!("Backend failed (fetch): {}", e);
+ tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+ }
+ }
+ });
+
+ if config.no_run_server {
+ ingest_files(config.clone(), backend.clone()).await?;
+ return Ok(())
+ }
+
+ let (request_ingest_tx, mut request_ingest_rx) = mpsc::channel(1);
+
+ let index = Arc::new(tokio::sync::Mutex::new(build_index(config.clone(), backend.clone()).await?));
+
+ let (ingest_done_tx, _ingest_done_rx) = broadcast::channel(1);
+ let done_tx = Arc::new(ingest_done_tx.clone());
+
+ let _ingest_task = tokio::spawn({
+ let config = config.clone();
+ let backend = backend.clone();
+ let index = index.clone();
+ async move {
+ loop {
+ log::info!("Ingest running");
+ match ingest_files(config.clone(), backend.clone()).await {
+ Ok(_) => {
+ match build_index(config.clone(), backend.clone()).await {
+ Ok(new_index) => {
+ *index.lock().await = new_index;
+ }
+ Err(e) => {
+ log::error!("Index build failed: {:?}", e);
+ ingest_done_tx.send((false, format!("{:?}", e))).unwrap();
+ }
+ }
+ }
+ Err(e) => {
+ log::error!("Ingest failed: {:?}", e);
+ ingest_done_tx.send((false, format!("{:?}", e))).unwrap();
+ }
+ }
+ ingest_done_tx.send((true, format!("OK"))).unwrap();
+ request_ingest_rx.recv().await;
+ }
+ }
+ });
+
+ let cors = CorsLayer::permissive();
+
+ let config_ = config.clone();
+ let client = Arc::new(Client::new());
+ let app = Router::new()
+ .route("/", post(|req| async move {
+ let config = config.clone();
+ let backend_config = backend.clone();
+ let mut index = index.lock().await; // TODO: use ConcurrentIndex here
+ let client = client.clone();
+ handle_request(&config, backend_config, client.clone(), &mut index, req).await.map_err(|e| format!("{:?}", e))
+ }))
+ .route("/", get(|_req: axum::http::Request| async move {
+ "OK"
+ }))
+ .route("/reload", post(|_req: axum::http::Request| async move {
+ log::info!("Requesting index reload");
+ let mut done_rx = done_tx.clone().subscribe();
+ let _ = request_ingest_tx.send(()).await; // ignore possible error, which is presumably because the queue is full
+ match done_rx.recv().await {
+ Ok((true, status)) => {
+ let mut res = status.into_response();
+ *res.status_mut() = StatusCode::OK;
+ res
+ },
+ Ok((false, status)) => {
+ let mut res = status.into_response();
+ *res.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
+ res
+ },
+ Err(_) => {
+ let mut res = "internal error".into_response();
+ *res.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
+ res
+ }
+ }
+ }))
+ .layer(cors);
+
+ let addr = format!("0.0.0.0:{}", config_.port);
+ log::info!("Starting server on {}", addr);
+ let listener = tokio::net::TcpListener::bind(&addr).await.unwrap();
+ axum::serve(listener, app).await?;
+
+ Ok(())
+}
\ No newline at end of file
diff --git a/src/ocr.rs b/src/ocr.rs
new file mode 100644
index 0000000..90e523e
--- /dev/null
+++ b/src/ocr.rs
@@ -0,0 +1,173 @@
+use anyhow::{anyhow, Result};
+use image::{DynamicImage, GenericImageView, ImageFormat};
+use regex::Regex;
+use reqwest::{
+ header::{HeaderMap, HeaderValue},
+ multipart::{Form, Part},
+ Client,
+};
+use serde_json::Value;
+use std::{io::Cursor, time::{SystemTime, UNIX_EPOCH}};
+use serde::{Deserialize, Serialize};
+
+const CALLBACK_REGEX: &str = r">AF_initDataCallback\((\{key: 'ds:1'.*?\})\);";
+const MAX_DIM: u32 = 1024;
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct SegmentCoords {
+ pub x: i32,
+ pub y: i32,
+ pub w: i32,
+ pub h: i32,
+}
+
+#[derive(Debug, Deserialize, Serialize)]
+pub struct Segment {
+ pub coords: SegmentCoords,
+ pub text: String,
+}
+
+pub type ScanResult = Vec;
+
+fn rationalize_coords_format1(
+ image_w: f64,
+ image_h: f64,
+ center_x_fraction: f64,
+ center_y_fraction: f64,
+ width_fraction: f64,
+ height_fraction: f64,
+) -> SegmentCoords {
+ SegmentCoords {
+ x: ((center_x_fraction - width_fraction / 2.0) * image_w).round() as i32,
+ y: ((center_y_fraction - height_fraction / 2.0) * image_h).round() as i32,
+ w: (width_fraction * image_w).round() as i32,
+ h: (height_fraction * image_h).round() as i32,
+ }
+}
+
+async fn scan_image_chunk(
+ client: &Client,
+ image: &[u8],
+ image_width: u32,
+ image_height: u32,
+) -> Result {
+ let timestamp = SystemTime::now()
+ .duration_since(UNIX_EPOCH)
+ .unwrap()
+ .as_micros();
+
+ let part = Part::bytes(image.to_vec())
+ .file_name(format!("ocr{}.png", timestamp))
+ .mime_str("image/png")?;
+
+ let form = Form::new().part("encoded_image", part);
+
+ let mut headers = HeaderMap::new();
+ headers.insert(
+ "User-Agent",
+ HeaderValue::from_static("Mozilla/5.0 (Linux; Android 13; RMX3771) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.144 Mobile Safari/537.36"),
+ );
+ headers.insert("Cookie", HeaderValue::from_str(&format!("SOCS=CAESEwgDEgk0ODE3Nzk3MjQaAmVuIAEaBgiA_LyaBg; stcs={}", timestamp))?);
+
+ let response = client
+ .post(&format!("https://lens.google.com/v3/upload?stcs={}", timestamp))
+ .multipart(form)
+ .headers(headers)
+ .send()
+ .await?;
+
+ let body = response.text().await?;
+
+ let re = Regex::new(CALLBACK_REGEX)?;
+ let captures = re
+ .captures(&body)
+ .ok_or_else(|| anyhow!("invalid API response"))?;
+ let match_str = captures.get(1).unwrap().as_str();
+
+ let lens_object: Value = json5::from_str(match_str)?;
+
+ if lens_object.get("errorHasStatus").is_some() {
+ return Err(anyhow!("lens failed"));
+ }
+
+ let root = lens_object["data"].as_array().unwrap();
+
+ let mut text_segments = Vec::new();
+ let mut text_regions = Vec::new();
+
+ let text_segments_raw = root[3][4][0][0]
+ .as_array()
+ .ok_or_else(|| anyhow!("invalid text segments"))?;
+ let text_regions_raw = root[2][3][0]
+ .as_array()
+ .ok_or_else(|| anyhow!("invalid text regions"))?;
+
+ for region in text_regions_raw {
+ let region_data = region.as_array().unwrap();
+ if region_data[11].as_str().unwrap().starts_with("text:") {
+ let raw_coords = region_data[1].as_array().unwrap();
+ let coords = rationalize_coords_format1(
+ image_width as f64,
+ image_height as f64,
+ raw_coords[0].as_f64().unwrap(),
+ raw_coords[1].as_f64().unwrap(),
+ raw_coords[2].as_f64().unwrap(),
+ raw_coords[3].as_f64().unwrap(),
+ );
+ text_regions.push(coords);
+ }
+ }
+
+ for segment in text_segments_raw {
+ let text_segment = segment.as_str().unwrap().to_string();
+ text_segments.push(text_segment);
+ }
+
+ Ok(text_segments
+ .into_iter()
+ .zip(text_regions.into_iter())
+ .map(|(text, coords)| Segment { text, coords })
+ .collect())
+}
+
+pub async fn scan_image(client: &Client, image: &DynamicImage) -> Result {
+ let mut result = ScanResult::new();
+ let (width, height) = image.dimensions();
+
+ let (width, height, image) = if width > MAX_DIM {
+ let height = ((height as f64) * (MAX_DIM as f64) / (width as f64)).round() as u32;
+ let new_image = tokio::task::block_in_place(|| image.resize_exact(MAX_DIM, height, image::imageops::FilterType::Lanczos3));
+ (MAX_DIM, height, std::borrow::Cow::Owned(new_image))
+ } else {
+ (width, height, std::borrow::Cow::Borrowed(image))
+ };
+
+ let mut y = 0;
+ while y < height {
+ let chunk_height = (height - y).min(MAX_DIM);
+ let chunk = tokio::task::block_in_place(|| {
+ let chunk = image.view(0, y, width, chunk_height).to_image();
+ let mut buf = Vec::new();
+ let mut csr = Cursor::new(&mut buf);
+ chunk.write_to(&mut csr, ImageFormat::Png)?;
+ Ok::, anyhow::Error>(buf)
+ })?;
+
+ let res = scan_image_chunk(client, &chunk, width, chunk_height).await?;
+ for segment in res {
+ result.push(Segment {
+ text: segment.text,
+ coords: SegmentCoords {
+ y: segment.coords.y + y as i32,
+ x: segment.coords.x,
+ w: segment.coords.w,
+ h: segment.coords.h,
+ },
+ });
+ }
+
+ y += chunk_height;
+ }
+
+ Ok(result)
+}
\ No newline at end of file