From 7cb42e028f085a91303a9707e4b741b9f50dcc8e Mon Sep 17 00:00:00 2001 From: osmarks Date: Tue, 21 May 2024 00:09:04 +0100 Subject: [PATCH] Rewrite entire application (well, backend) in Rust and also Go I decided I wanted to integrate the experimental OCR thing better, so I rewrote in Go and also integrated the thumbnailer. However, Go is a bad langauge and I only used it out of spite. It turned out to have a very hard-to-fix memory leak due to some unclear interaction between libvips and both sets of bindings I tried, so I had Claude-3 transpile it to Rust then spent a while fixing the several mistakes it made and making tweaks. The new Rust version works, although I need to actually do something with the OCR data and make the index queryable concurrently. --- .gitignore | 5 +- ...8c417e06d7c4e53e1ec542243ccf2808bbab7.json | 12 + ...5a299878bc0fe4af582c6791a411741ee41d9.json | 12 + ...e5b5ef87958bec4d6db6bd06660b71f7a1ad0.json | 12 + ...62df98ee2fcbe8df11f7db18a3647b2e0f1a2.json | 12 + ...55e0f6503fddd369f2c3031f39c0759bb97a0.json | 62 + ...5c6d8631e48be9166c02b593020a17fcf2686.json | 12 + Cargo.lock | 3320 +++++++++++++++++ Cargo.toml | 34 + clipfront2/src/App.svelte | 27 +- clipfront2/src/build.js | 2 +- clipfront2/src/util.js | 17 +- frontend_config.json | 6 +- misc/bad-go-version/go.mod | 26 + misc/bad-go-version/go.sum | 100 + misc/bad-go-version/meme_search.go | 877 +++++ misc/bad-go-version/ocr.go | 264 ++ misc/bad-go-version/problematic_thing.go | 891 +++++ misc/bad-go-version/problematic_thing_2.go | 265 ++ mse.py | 112 +- mse_config.json | 9 +- ocr.py | 101 + src/main.rs | 892 +++++ src/ocr.rs | 173 + 24 files changed, 7192 insertions(+), 51 deletions(-) create mode 100644 .sqlx/query-0d5b91c01acf72be0cd78f1a0c58c417e06d7c4e53e1ec542243ccf2808bbab7.json create mode 100644 .sqlx/query-63edaa9692deb1a9fb17d9e16905a299878bc0fe4af582c6791a411741ee41d9.json create mode 100644 .sqlx/query-b6803e2443445de725290dde85de5b5ef87958bec4d6db6bd06660b71f7a1ad0.json create mode 100644 .sqlx/query-bed71d48c691bff7464b1aa767162df98ee2fcbe8df11f7db18a3647b2e0f1a2.json create mode 100644 .sqlx/query-ec2da4ab11ede7a9a468ff3a50c55e0f6503fddd369f2c3031f39c0759bb97a0.json create mode 100644 .sqlx/query-ee6eca5b34c3fbf76cd10932db35c6d8631e48be9166c02b593020a17fcf2686.json create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 misc/bad-go-version/go.mod create mode 100644 misc/bad-go-version/go.sum create mode 100644 misc/bad-go-version/meme_search.go create mode 100644 misc/bad-go-version/ocr.go create mode 100644 misc/bad-go-version/problematic_thing.go create mode 100644 misc/bad-go-version/problematic_thing_2.go create mode 100644 ocr.py create mode 100644 src/main.rs create mode 100644 src/ocr.rs diff --git a/.gitignore b/.gitignore index a0caf95..536110d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,7 @@ meme-rater/images/ meme-rater/meta/ meme-rater/*.sqlite3* meme-rater/deploy_for_training.sh -node_modules/* \ No newline at end of file +node_modules/* +node_modules +*sqlite3* +thumbtemp \ No newline at end of file diff --git a/.sqlx/query-0d5b91c01acf72be0cd78f1a0c58c417e06d7c4e53e1ec542243ccf2808bbab7.json b/.sqlx/query-0d5b91c01acf72be0cd78f1a0c58c417e06d7c4e53e1ec542243ccf2808bbab7.json new file mode 100644 index 0000000..69b49d9 --- /dev/null +++ b/.sqlx/query-0d5b91c01acf72be0cd78f1a0c58c417e06d7c4e53e1ec542243ccf2808bbab7.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "INSERT OR IGNORE INTO files (filename) VALUES (?)", + "describe": { + "columns": [], + "parameters": { + "Right": 1 + }, + "nullable": [] + }, + "hash": "0d5b91c01acf72be0cd78f1a0c58c417e06d7c4e53e1ec542243ccf2808bbab7" +} diff --git a/.sqlx/query-63edaa9692deb1a9fb17d9e16905a299878bc0fe4af582c6791a411741ee41d9.json b/.sqlx/query-63edaa9692deb1a9fb17d9e16905a299878bc0fe4af582c6791a411741ee41d9.json new file mode 100644 index 0000000..f6da11d --- /dev/null +++ b/.sqlx/query-63edaa9692deb1a9fb17d9e16905a299878bc0fe4af582c6791a411741ee41d9.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "UPDATE files SET ocr = ?, raw_ocr_segments = ?, ocr_time = ? WHERE filename = ?", + "describe": { + "columns": [], + "parameters": { + "Right": 4 + }, + "nullable": [] + }, + "hash": "63edaa9692deb1a9fb17d9e16905a299878bc0fe4af582c6791a411741ee41d9" +} diff --git a/.sqlx/query-b6803e2443445de725290dde85de5b5ef87958bec4d6db6bd06660b71f7a1ad0.json b/.sqlx/query-b6803e2443445de725290dde85de5b5ef87958bec4d6db6bd06660b71f7a1ad0.json new file mode 100644 index 0000000..208e746 --- /dev/null +++ b/.sqlx/query-b6803e2443445de725290dde85de5b5ef87958bec4d6db6bd06660b71f7a1ad0.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "UPDATE files SET thumbnails = ?, thumbnail_time = ? WHERE filename = ?", + "describe": { + "columns": [], + "parameters": { + "Right": 3 + }, + "nullable": [] + }, + "hash": "b6803e2443445de725290dde85de5b5ef87958bec4d6db6bd06660b71f7a1ad0" +} diff --git a/.sqlx/query-bed71d48c691bff7464b1aa767162df98ee2fcbe8df11f7db18a3647b2e0f1a2.json b/.sqlx/query-bed71d48c691bff7464b1aa767162df98ee2fcbe8df11f7db18a3647b2e0f1a2.json new file mode 100644 index 0000000..d91e95a --- /dev/null +++ b/.sqlx/query-bed71d48c691bff7464b1aa767162df98ee2fcbe8df11f7db18a3647b2e0f1a2.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "UPDATE files SET embedding_time = ?, embedding = ? WHERE filename = ?", + "describe": { + "columns": [], + "parameters": { + "Right": 3 + }, + "nullable": [] + }, + "hash": "bed71d48c691bff7464b1aa767162df98ee2fcbe8df11f7db18a3647b2e0f1a2" +} diff --git a/.sqlx/query-ec2da4ab11ede7a9a468ff3a50c55e0f6503fddd369f2c3031f39c0759bb97a0.json b/.sqlx/query-ec2da4ab11ede7a9a468ff3a50c55e0f6503fddd369f2c3031f39c0759bb97a0.json new file mode 100644 index 0000000..b362e6b --- /dev/null +++ b/.sqlx/query-ec2da4ab11ede7a9a468ff3a50c55e0f6503fddd369f2c3031f39c0759bb97a0.json @@ -0,0 +1,62 @@ +{ + "db_name": "SQLite", + "query": "SELECT * FROM files WHERE filename = ?", + "describe": { + "columns": [ + { + "name": "filename", + "ordinal": 0, + "type_info": "Text" + }, + { + "name": "embedding_time", + "ordinal": 1, + "type_info": "Int64" + }, + { + "name": "ocr_time", + "ordinal": 2, + "type_info": "Int64" + }, + { + "name": "thumbnail_time", + "ordinal": 3, + "type_info": "Int64" + }, + { + "name": "embedding", + "ordinal": 4, + "type_info": "Blob" + }, + { + "name": "ocr", + "ordinal": 5, + "type_info": "Text" + }, + { + "name": "raw_ocr_segments", + "ordinal": 6, + "type_info": "Blob" + }, + { + "name": "thumbnails", + "ordinal": 7, + "type_info": "Blob" + } + ], + "parameters": { + "Right": 1 + }, + "nullable": [ + false, + true, + true, + true, + true, + true, + true, + true + ] + }, + "hash": "ec2da4ab11ede7a9a468ff3a50c55e0f6503fddd369f2c3031f39c0759bb97a0" +} diff --git a/.sqlx/query-ee6eca5b34c3fbf76cd10932db35c6d8631e48be9166c02b593020a17fcf2686.json b/.sqlx/query-ee6eca5b34c3fbf76cd10932db35c6d8631e48be9166c02b593020a17fcf2686.json new file mode 100644 index 0000000..ecedbf9 --- /dev/null +++ b/.sqlx/query-ee6eca5b34c3fbf76cd10932db35c6d8631e48be9166c02b593020a17fcf2686.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "DELETE FROM files WHERE filename = ?", + "describe": { + "columns": [], + "parameters": { + "Right": 1 + }, + "nullable": [] + }, + "hash": "ee6eca5b34c3fbf76cd10932db35c6d8631e48be9166c02b593020a17fcf2686" +} diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..543b422 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,3320 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "aligned-vec" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1" + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "arbitrary" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" + +[[package]] +name = "arg_enum_proc_macro" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "async-trait" +version = "0.1.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "av1-grain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6678909d8c5d46a42abcf571271e15fdbc0a225e3646cf23762cd415046c78bf" +dependencies = [ + "anyhow", + "arrayvec", + "log", + "nom", + "num-rational", + "v_frame", +] + +[[package]] +name = "avif-serialize" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876c75a42f6364451a033496a14c44bffe41f5f4a8236f697391f11024e596d2" +dependencies = [ + "arrayvec", +] + +[[package]] +name = "axum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper 1.0.1", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper 0.1.2", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "backtrace" +version = "0.3.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64ct" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" + +[[package]] +name = "bit_field" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +dependencies = [ + "serde", +] + +[[package]] +name = "bitstream-io" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c12d1856e42f0d817a835fe55853957c85c8c8a470114029143d3f12671446e" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "built" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41bfbdb21256b87a8b5e80fab81a8eed158178e812fd7ba451907518b2742f16" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytemuck" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + +[[package]] +name = "bytes" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" + +[[package]] +name = "cc" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] + +[[package]] +name = "cfg-expr" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d067ad48b8650848b989a59a86c6c36a995d02d2bf778d45c3c5d57bc2718f02" +dependencies = [ + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets 0.52.5", +] + +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "cpufeatures" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + +[[package]] +name = "crc32fast" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "der" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "const-oid", + "crypto-common", + "subtle", +] + +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "either" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +dependencies = [ + "serde", +] + +[[package]] +name = "encoding_rs" +version = "0.8.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "env_logger" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" +dependencies = [ + "humantime", + "is-terminal", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + +[[package]] +name = "exr" +version = "1.72.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "887d93f60543e9a9362ef8a21beedd0a833c5d9610e18c67abe15a5963dcb1a4" +dependencies = [ + "bit_field", + "flume", + "half", + "lebe", + "miniz_oxide", + "rayon-core", + "smallvec", + "zune-inflate", +] + +[[package]] +name = "faiss" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3ffe048432786028b0a30aa1d13e10e08ced380439ba4a83fe5c227d2dd9733" +dependencies = [ + "faiss-sys", +] + +[[package]] +name = "faiss-sys" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9c008fc56422bf34357f17226d9c5a5c2ef6245b4774759c5f67112e46915e" + +[[package]] +name = "fastrand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + +[[package]] +name = "fdeflate" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f9bfee30e4dedf0ab8b422f03af778d9612b63f502710fc500a334ebe2de645" +dependencies = [ + "simd-adler32", +] + +[[package]] +name = "finl_unicode" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" + +[[package]] +name = "flate2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "flume" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181" +dependencies = [ + "futures-core", + "futures-sink", + "spin 0.9.8", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gif" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb2d69b19215e18bb912fa30f7ce15846e301408695e44e0ef719f1da9e19f2" +dependencies = [ + "color_quant", + "weezl", +] + +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + +[[package]] +name = "h2" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "hyper" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "pin-project-lite", + "socket2", + "tokio", + "tower", + "tower-service", + "tracing", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "image" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd54d660e773627692c524beaad361aca785a4f9f5730ce91f42aabe5bce3d11" +dependencies = [ + "bytemuck", + "byteorder", + "color_quant", + "exr", + "gif", + "image-webp", + "num-traits", + "png", + "qoi", + "ravif", + "rayon", + "rgb", + "tiff", + "zune-core", + "zune-jpeg", +] + +[[package]] +name = "image-webp" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d730b085583c4d789dfd07fdcf185be59501666a90c97c40162b37e4fdad272d" +dependencies = [ + "byteorder-lite", + "thiserror", +] + +[[package]] +name = "imgref" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44feda355f4159a7c757171a77de25daf6411e217b4cabd03bd6650690468126" + +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "interpolate_name" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "ipnet" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" + +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] + +[[package]] +name = "jpeg-decoder" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "json5" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1" +dependencies = [ + "pest", + "pest_derive", + "serde", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +dependencies = [ + "winapi", + "winapi-build", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +dependencies = [ + "spin 0.5.2", +] + +[[package]] +name = "lebe" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7" +dependencies = [ + "arbitrary", + "cc", + "once_cell", +] + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "libsqlite3-sys" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4e226dcd58b4be396f7bd3c20da8fdee2911400705297ba7d2d7cc2c30f716" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "loop9" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062" +dependencies = [ + "imgref", +] + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "matrixmultiply" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "maybe-rayon" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519" +dependencies = [ + "cfg-if", + "rayon", +] + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "meme-search-engine" +version = "0.1.0" +dependencies = [ + "anyhow", + "axum", + "base64 0.22.1", + "chrono", + "faiss", + "fnv", + "futures-util", + "half", + "image", + "json5", + "log", + "ndarray", + "num_cpus", + "pretty_env_logger", + "regex", + "reqwest", + "rmp-serde", + "serde", + "serde_bytes", + "serde_json", + "sqlx", + "tokio", + "tokio-stream", + "tower", + "tower-http", + "walkdir", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "mime_guess" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" +dependencies = [ + "mime", + "unicase", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" +dependencies = [ + "adler", + "simd-adler32", +] + +[[package]] +name = "mio" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "native-tls" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "ndarray" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "noop_proc_macro" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8" + +[[package]] +name = "num-bigint" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-bigint-dig" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +dependencies = [ + "byteorder", + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand", + "smallvec", + "zeroize", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "openssl" +version = "0.10.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +dependencies = [ + "bitflags 2.5.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "parking_lot" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.5.1", + "smallvec", + "windows-targets 0.52.5", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pest" +version = "2.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "560131c633294438da9f7c4b08189194b20946c8274c6b9e38881a7874dc8ee8" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26293c9193fbca7b1a3bf9b79dc1e388e927e6cacaa78b4a3ab705a1d3d41459" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ec22af7d3fb470a85dd2ca96b7c577a1eb4ef6f1683a9fe9a8c16e136c04687" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "pest_meta" +version = "2.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a240022f37c361ec1878d646fc5b7d7c4d28d5946e1a80ad5a7a4f4ca0bdcd" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + +[[package]] +name = "pin-project" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "png" +version = "0.17.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06e4b0d3d1312775e782c86c91a111aa1f910cbb65e1337f9975b5f9a554b5e1" +dependencies = [ + "bitflags 1.3.2", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "pretty_env_logger" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c" +dependencies = [ + "env_logger", + "log", +] + +[[package]] +name = "proc-macro2" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "profiling" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d84d1d7a6ac92673717f9f6d1518374ef257669c24ebc5ac25d5033828be58" +dependencies = [ + "profiling-procmacros", +] + +[[package]] +name = "profiling-procmacros" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd" +dependencies = [ + "quote", + "syn 2.0.65", +] + +[[package]] +name = "qoi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rav1e" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd87ce80a7665b1cce111f8a16c1f3929f6547ce91ade6addf4ec86a8dda5ce9" +dependencies = [ + "arbitrary", + "arg_enum_proc_macro", + "arrayvec", + "av1-grain", + "bitstream-io", + "built", + "cfg-if", + "interpolate_name", + "itertools", + "libc", + "libfuzzer-sys", + "log", + "maybe-rayon", + "new_debug_unreachable", + "noop_proc_macro", + "num-derive", + "num-traits", + "once_cell", + "paste", + "profiling", + "rand", + "rand_chacha", + "simd_helpers", + "system-deps", + "thiserror", + "v_frame", + "wasm-bindgen", +] + +[[package]] +name = "ravif" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc13288f5ab39e6d7c9d501759712e6969fcc9734220846fc9ed26cae2cc4234" +dependencies = [ + "avif-serialize", + "imgref", + "loop9", + "quick-error", + "rav1e", + "rayon", + "rgb", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +dependencies = [ + "bitflags 2.5.0", +] + +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" + +[[package]] +name = "reqwest" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" +dependencies = [ + "base64 0.22.1", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-tls", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "mime_guess", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper 0.1.2", + "system-configuration", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + +[[package]] +name = "rgb" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "rmp" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" +dependencies = [ + "byteorder", + "num-traits", + "paste", +] + +[[package]] +name = "rmp-serde" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db" +dependencies = [ + "byteorder", + "rmp", + "serde", +] + +[[package]] +name = "rsa" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core", + "signature", + "spki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.5.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls-pemfile" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +dependencies = [ + "base64 0.22.1", + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" + +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d931a44fdaa43b8637009e7632a02adc4f2b2e0733c08caa4cf00e8da4a117a7" +dependencies = [ + "kernel32-sys", + "winapi", +] + +[[package]] +name = "schannel" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "security-framework" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" +dependencies = [ + "bitflags 2.5.0", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "serde" +version = "1.0.202" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_bytes" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b8497c313fd43ab992087548117643f6fcd935cbf36f176ffda0aacf9591734" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.202" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "serde_json" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_path_to_error" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6" +dependencies = [ + "itoa", + "serde", +] + +[[package]] +name = "serde_spanned" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core", +] + +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + +[[package]] +name = "simd_helpers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6" +dependencies = [ + "quote", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "socket2" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + +[[package]] +name = "sqlformat" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c" +dependencies = [ + "itertools", + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlx" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9a2ccff1a000a5a59cd33da541d9f2fdcd9e6e8229cc200565942bff36d0aaa" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24ba59a9342a3d9bab6c56c118be528b27c9b60e490080e9711a04dccac83ef6" +dependencies = [ + "ahash", + "atoi", + "byteorder", + "bytes", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-channel", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashlink", + "hex", + "indexmap", + "log", + "memchr", + "once_cell", + "paste", + "percent-encoding", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlformat", + "thiserror", + "tokio", + "tokio-stream", + "tracing", + "url", +] + +[[package]] +name = "sqlx-macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ea40e2345eb2faa9e1e5e326db8c34711317d2b5e08d0d5741619048a803127" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn 1.0.109", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5833ef53aaa16d860e92123292f1f6a3d53c34ba8b1969f152ef1a7bb803f3c8" +dependencies = [ + "dotenvy", + "either", + "heck 0.4.1", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-sqlite", + "syn 1.0.109", + "tempfile", + "tokio", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ed31390216d20e538e447a7a9b959e06ed9fc51c37b514b46eb758016ecd418" +dependencies = [ + "atoi", + "base64 0.21.7", + "bitflags 2.5.0", + "byteorder", + "bytes", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-postgres" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c824eb80b894f926f89a0b9da0c7f435d27cdd35b8c655b114e58223918577e" +dependencies = [ + "atoi", + "base64 0.21.7", + "bitflags 2.5.0", + "byteorder", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-sqlite" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b244ef0a8414da0bed4bb1910426e890b19e5e9bccc27ada6b797d05c55ae0aa" +dependencies = [ + "atoi", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "sqlx-core", + "tracing", + "url", + "urlencoding", +] + +[[package]] +name = "stringprep" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb41d74e231a107a1b4ee36bd1214b11285b77768d2e3824aedafa988fd36ee6" +dependencies = [ + "finl_unicode", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + +[[package]] +name = "sync_wrapper" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" + +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "system-deps" +version = "6.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e535eb8dded36d55ec13eddacd30dec501792ff23a0b1682c38601b8cf2349" +dependencies = [ + "cfg-expr", + "heck 0.5.0", + "pkg-config", + "toml", + "version-compare", +] + +[[package]] +name = "target-lexicon" +version = "0.12.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "tiff" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1310fcea54c6a9a4fd1aad794ecc02c31682f6bfbecdf460bf19533eed1e3e" +dependencies = [ + "flate2", + "jpeg-decoder", + "weezl", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "num_cpus", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-macros" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "toml" +version = "0.8.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e43f8cc456c9704c851ae29c67e17ef65d2c30017c17a9765b89c382dc8bba" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c127785850e8c20836d49732ae6abfa47616e60bf9d9f57c43c250361a9db96c" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "pin-project", + "pin-project-lite", + "tokio", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" +dependencies = [ + "bitflags 2.5.0", + "bytes", + "http", + "http-body", + "http-body-util", + "pin-project-lite", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + +[[package]] +name = "unicase" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" +dependencies = [ + "version_check", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + +[[package]] +name = "url" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + +[[package]] +name = "v_frame" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f32aaa24bacd11e488aa9ba66369c7cd514885742c9fe08cfe85884db3e92b" +dependencies = [ + "aligned-vec", + "num-traits", + "wasm-bindgen", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version-compare" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "walkdir" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb08f9e670fab86099470b97cd2b252d6527f0b3cc1401acdb595ffc9dd288ff" +dependencies = [ + "kernel32-sys", + "same-file", + "winapi", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.65", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "web-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "weezl" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" + +[[package]] +name = "whoami" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44ab49fad634e88f55bf8f9bb3abd2f27d7204172a112c7c9987e01c1c94ea9" +dependencies = [ + "redox_syscall 0.4.1", + "wasite", +] + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" + +[[package]] +name = "winapi-util" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "winnow" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3c52e9c97a68071b23e836c9380edae937f17b9c4667bd021973efc689f618d" +dependencies = [ + "memchr", +] + +[[package]] +name = "winreg" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "zerocopy" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.65", +] + +[[package]] +name = "zeroize" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" + +[[package]] +name = "zune-core" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" + +[[package]] +name = "zune-inflate" +version = "0.2.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02" +dependencies = [ + "simd-adler32", +] + +[[package]] +name = "zune-jpeg" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec866b44a2a1fd6133d363f073ca1b179f438f99e7e5bfb1e33f7181facfe448" +dependencies = [ + "zune-core", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f84055e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "meme-search-engine" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +tokio = { version = "1", features = ["full"] } +axum = "0.7" +image = { version = "0.25", features = ["avif"] } +reqwest = { version = "0.12", features = ["multipart"] } +serde = { version = "1", features = ["derive"] } +sqlx = { version = "0.7", features = ["runtime-tokio", "sqlite"] } +walkdir = "1" +log = "0.4" +rmp-serde = "1" +serde_json = "1" +chrono = "0.4" +base64 = "0.22" +anyhow = "1" +fnv = "1" +faiss = "0.12" +ndarray = "0.15" +half = { version = "2" } +regex = "1" +pretty_env_logger = "0.5" +futures-util = "0.3" +tokio-stream = "0.1" +num_cpus = "1" +serde_bytes = "0.11" +tower-http = { version = "0.5", features = ["cors"] } +tower = "0.4" +json5 = "0.4" \ No newline at end of file diff --git a/clipfront2/src/App.svelte b/clipfront2/src/App.svelte index f0e637a..fd7d042 100644 --- a/clipfront2/src/App.svelte +++ b/clipfront2/src/App.svelte @@ -62,6 +62,8 @@ .result border: 1px solid gray + * + display: block .result img width: 100% @@ -109,17 +111,22 @@ {/if} {#if results} + {#if displayedResults.length === 0} + No results. Wait for index rebuild. + {/if} {#each displayedResults as result} {#key result.file} @@ -171,9 +178,7 @@ let displayedResults = [] const runSearch = async () => { if (!resultPromise) { - let args = {} - args.text = queryTerms.filter(x => x.type === "text" && x.text).map(({ text, weight, sign }) => [ text, weight * { "+": 1, "-": -1 }[sign] ]) - args.images = queryTerms.filter(x => x.type === "image").map(({ imageData, weight, sign }) => [ imageData, weight * { "+": 1, "-": -1 }[sign] ]) + let args = {"terms": queryTerms.map(x => ({ image: x.imageData, text: x.text, weight: x.weight * { "+": 1, "-": -1 }[x.sign] }))} resultPromise = util.doQuery(args).then(res => { error = null results = res @@ -181,7 +186,8 @@ displayedResults = [] pendingImageLoads = 0 for (let i = 0; i < chunkSize; i++) { - displayedResults.push(results[i]) + if (i >= results.matches.length) break + displayedResults.push(results.matches[i]) pendingImageLoads += 1 } redrawGrid() @@ -195,7 +201,8 @@ if (window.scrollY + window.innerHeight < heightThreshold) return; let init = displayedResults.length for (let i = 0; i < chunkSize; i++) { - displayedResults.push(results[init + i]) + if (init + i >= results.matches.length) break + displayedResults.push(results.matches[init + i]) pendingImageLoads += 1 } displayedResults = displayedResults diff --git a/clipfront2/src/build.js b/clipfront2/src/build.js index 6037466..2ade5ba 100644 --- a/clipfront2/src/build.js +++ b/clipfront2/src/build.js @@ -7,7 +7,7 @@ esbuild .build({ entryPoints: [path.join(__dirname, "app.js")], bundle: true, - minify: true, + minify: false, outfile: path.join(__dirname, "../static/app.js"), plugins: [sveltePlugin({ preprocess: { diff --git a/clipfront2/src/util.js b/clipfront2/src/util.js index 502916c..50fc4c7 100644 --- a/clipfront2/src/util.js +++ b/clipfront2/src/util.js @@ -1,7 +1,8 @@ import * as config from "../../frontend_config.json" +import * as backendConfig from "../../mse_config.json" import * as formats from "../../formats.json" -export const getURL = x => config.image_path + x +export const getURL = x => config.image_path + x[1] export const doQuery = args => fetch(config.backend_url, { method: "POST", @@ -11,15 +12,11 @@ export const doQuery = args => fetch(config.backend_url, { body: JSON.stringify(args) }).then(x => x.json()) -const filesafeCharset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-" -export const thumbnailPath = (originalPath, format) => { - const extension = formats.formats[format][0] - // Python and JS have minor differences in string handling wrt. astral characters which could result in incorrect quantities of dashes. Fortunately, Array.from handles this correctly. - return config.thumb_path + `${Array.from(originalPath).map(x => filesafeCharset.includes(x) ? x : "_").join("")}.${format}${extension}` +export const hasFormat = (results, result, format) => { + return result[3] && (1 << results.formats.indexOf(format)) !== 0 } -const thumbedExtensions = formats.extensions -export const hasThumbnails = t => { - const parts = t.split(".") - return thumbedExtensions.includes("." + parts[parts.length - 1]) +export const thumbnailURL = (results, result, format) => { + console.log("RES", results) + return `${config.thumb_path}${result[2]}${format}.${results.extensions[format]}` } \ No newline at end of file diff --git a/frontend_config.json b/frontend_config.json index 2eb8b12..3406307 100644 --- a/frontend_config.json +++ b/frontend_config.json @@ -1,5 +1,5 @@ { - "backend_url": "https://mse.osmarks.net/backend", - "image_path": "https://i2.osmarks.net/memes-or-something/", - "thumb_path": "https://i2.osmarks.net/thumbs/memes-or-something_" + "backend_url": "http://localhost:1707/", + "image_path": "http://localhost:7858/", + "thumb_path": "http://localhost:7857/" } \ No newline at end of file diff --git a/misc/bad-go-version/go.mod b/misc/bad-go-version/go.mod new file mode 100644 index 0000000..5dd9b45 --- /dev/null +++ b/misc/bad-go-version/go.mod @@ -0,0 +1,26 @@ +module meme-search + +go 1.22.2 + +require ( + github.com/DataIntelligenceCrew/go-faiss v0.2.0 + github.com/jmoiron/sqlx v1.4.0 + github.com/mattn/go-sqlite3 v1.14.22 + github.com/samber/lo v1.39.0 + github.com/titanous/json5 v1.0.0 + github.com/vmihailenco/msgpack v4.0.4+incompatible + github.com/x448/float16 v0.8.4 + golang.org/x/sync v0.7.0 +) + +require ( + github.com/davidbyttow/govips/v2 v2.14.0 // indirect + github.com/golang/protobuf v1.5.2 // indirect + github.com/h2non/bimg v1.1.9 // indirect + golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect + golang.org/x/image v0.16.0 // indirect + golang.org/x/net v0.25.0 // indirect + golang.org/x/text v0.15.0 // indirect + google.golang.org/appengine v1.6.8 // indirect + google.golang.org/protobuf v1.26.0 // indirect +) diff --git a/misc/bad-go-version/go.sum b/misc/bad-go-version/go.sum new file mode 100644 index 0000000..02a29a2 --- /dev/null +++ b/misc/bad-go-version/go.sum @@ -0,0 +1,100 @@ +filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= +github.com/DataIntelligenceCrew/go-faiss v0.2.0 h1:c0pxAr0vldXIuE4DZnqpl6FuuH1uZd45d+NiQHKg1uU= +github.com/DataIntelligenceCrew/go-faiss v0.2.0/go.mod h1:4Gi7G3PF78IwZigTL2M1AJXOaAgxyL66vCqUYVaNgwk= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davidbyttow/govips/v2 v2.14.0 h1:il3pX0XMZ5nlwipkFJHRZ3vGzcdXWApARalJxNpRHJU= +github.com/davidbyttow/govips/v2 v2.14.0/go.mod h1:eglyvgm65eImDiJJk4wpj9LSz4pWivPzWgDqkxWJn5k= +github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/h2non/bimg v1.1.9 h1:WH20Nxko9l/HFm4kZCA3Phbgu2cbHvYzxwxn9YROEGg= +github.com/h2non/bimg v1.1.9/go.mod h1:R3+UiYwkK4rQl6KVFTOFJHitgLbZXBZNFh2cv3AEbp8= +github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= +github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY= +github.com/json5/json5-go v0.0.0-20160331055859-40c2958e3bf8 h1:BQuwfXQRDQMI8YNqINKNlFV23P0h07ZvOQAtezAEsP8= +github.com/json5/json5-go v0.0.0-20160331055859-40c2958e3bf8/go.mod h1:7n1PdYNh4RIHTvILru80IEstTADqQz/wmjeNXTcC9rA= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= +github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA= +github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw= +github.com/thoas/go-funk v0.9.3/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q= +github.com/titanous/json5 v1.0.0 h1:hJf8Su1d9NuI/ffpxgxQfxh/UiBFZX7bMPid0rIL/7s= +github.com/titanous/json5 v1.0.0/go.mod h1:7JH1M8/LHKc6cyP5o5g3CSaRj+mBrIimTxzpvmckH8c= +github.com/vmihailenco/msgpack v4.0.4+incompatible h1:dSLoQfGFAo3F6OoNhwUmLwVgaUXK79GlxNBwueZn0xI= +github.com/vmihailenco/msgpack v4.0.4+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= +golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM= +golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= +golang.org/x/image v0.10.0/go.mod h1:jtrku+n79PfroUbvDdeUWMAI+heR786BofxrbiSF+J0= +golang.org/x/image v0.16.0 h1:9kloLAKhUufZhA12l5fwnx2NZW39/we1UhBesW433jw= +golang.org/x/image v0.16.0/go.mod h1:ugSZItdV4nOxyqp56HmXwH0Ry0nBCpjnZdpDaIHdoPs= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/misc/bad-go-version/meme_search.go b/misc/bad-go-version/meme_search.go new file mode 100644 index 0000000..81fc2b7 --- /dev/null +++ b/misc/bad-go-version/meme_search.go @@ -0,0 +1,877 @@ +package main + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "fmt" + "hash/fnv" + "io" + "log" + "net/http" + "os" + "path/filepath" + "runtime" + "runtime/pprof" + "strings" + "sync" + "time" + + "github.com/DataIntelligenceCrew/go-faiss" + "github.com/h2non/bimg" + "github.com/jmoiron/sqlx" + _ "github.com/mattn/go-sqlite3" + "github.com/samber/lo" + "github.com/vmihailenco/msgpack" + "github.com/x448/float16" + "golang.org/x/sync/errgroup" +) + +type Config struct { + ClipServer string `json:"clip_server"` + DbPath string `json:"db_path"` + Port int16 `json:"port"` + Files string `json:"files"` + EnableOCR bool `json:"enable_ocr"` + ThumbsPath string `json:"thumbs_path"` + EnableThumbnails bool `json:"enable_thumbs"` +} + +type Index struct { + vectors *faiss.IndexImpl + filenames []string + formatCodes []int64 + formatNames []string +} + +var schema = ` +CREATE TABLE IF NOT EXISTS files ( + filename TEXT PRIMARY KEY, + embedding_time INTEGER, + ocr_time INTEGER, + thumbnail_time INTEGER, + embedding BLOB, + ocr TEXT, + raw_ocr_segments BLOB, + thumbnails BLOB +); + +CREATE VIRTUAL TABLE IF NOT EXISTS ocr_fts USING fts5 ( + filename, + ocr, + tokenize='unicode61 remove_diacritics 2', + content='ocr' +); + +CREATE TRIGGER IF NOT EXISTS ocr_fts_ins AFTER INSERT ON files BEGIN + INSERT INTO ocr_fts (rowid, filename, ocr) VALUES (new.rowid, new.filename, COALESCE(new.ocr, '')); +END; + +CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER DELETE ON files BEGIN + INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, '')); +END; + +CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER UPDATE ON files BEGIN + INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, '')); + INSERT INTO ocr_fts (rowid, filename, text) VALUES (new.rowid, new.filename, COALESCE(new.ocr, '')); +END; +` + +type FileRecord struct { + Filename string `db:"filename"` + EmbedTime int64 `db:"embedding_time"` + OcrTime int64 `db:"ocr_time"` + ThumbnailTime int64 `db:"thumbnail_time"` + Embedding []byte `db:"embedding"` + Ocr string `db:"ocr"` + RawOcrSegments []byte `db:"raw_ocr_segments"` + Thumbnails []byte `db:"thumbnails"` +} + +type InferenceServerConfig struct { + BatchSize uint `msgpack:"batch"` + ImageSize []uint `msgpack:"image_size"` + EmbeddingSize uint `msgpack:"embedding_size"` +} + +func decodeMsgpackFrom[O interface{}](resp *http.Response) (O, error) { + var result O + respData, err := io.ReadAll(resp.Body) + if err != nil { + return result, err + } + err = msgpack.Unmarshal(respData, &result) + return result, err +} + +func queryClipServer[I interface{}, O interface{}](config Config, path string, data I) (O, error) { + var result O + b, err := msgpack.Marshal(data) + if err != nil { + return result, err + } + resp, err := http.Post(config.ClipServer+path, "application/msgpack", bytes.NewReader(b)) + if err != nil { + return result, err + } + defer resp.Body.Close() + return decodeMsgpackFrom[O](resp) +} + +type LoadedImage struct { + image *bimg.Image + filename string + originalSize int +} + +type EmbeddingInput struct { + image []byte + filename string +} + +type EmbeddingRequest struct { + Images [][]byte `msgpack:"images"` + Text []string `msgpack:"text"` +} + +type EmbeddingResponse = [][]byte + +func timestamp() int64 { + return time.Now().UnixMicro() +} + +type ImageFormatConfig struct { + targetWidth int + targetFilesize int + quality int + format bimg.ImageType + extension string +} + +func generateFilenameHash(filename string) string { + hasher := fnv.New128() + hasher.Write([]byte(filename)) + hash := hasher.Sum(make([]byte, 0)) + return base64.RawURLEncoding.EncodeToString(hash) +} + +func generateThumbnailFilename(filename string, formatName string, formatConfig ImageFormatConfig) string { + return fmt.Sprintf("%s%s.%s", generateFilenameHash(filename), formatName, formatConfig.extension) +} + +func initializeDatabase(config Config) (*sqlx.DB, error) { + db, err := sqlx.Connect("sqlite3", config.DbPath) + if err != nil { + return nil, err + } + _, err = db.Exec("PRAGMA busy_timeout = 2000; PRAGMA journal_mode = WAL") + if err != nil { + return nil, err + } + return db, nil +} + +func imageFormats(config Config) map[string]ImageFormatConfig { + return map[string]ImageFormatConfig{ + "jpegl": { + targetWidth: 800, + quality: 70, + format: bimg.JPEG, + extension: "jpg", + }, + "jpegh": { + targetWidth: 1600, + quality: 80, + format: bimg.JPEG, + extension: "jpg", + }, + "jpeg256kb": { + targetWidth: 500, + targetFilesize: 256000, + format: bimg.JPEG, + extension: "jpg", + }, + "avifh": { + targetWidth: 1600, + quality: 80, + format: bimg.AVIF, + extension: "avif", + }, + "avifl": { + targetWidth: 800, + quality: 30, + format: bimg.AVIF, + extension: "avif", + }, + } +} + +func ingestFiles(config Config, backend InferenceServerConfig) error { + var wg errgroup.Group + var iwg errgroup.Group + + // We assume everything is either a modern browser (low-DPI or high-DPI), an ancient browser or a ComputerCraft machine abusing Extra Utilities 2 screens. + var formats = imageFormats(config) + + db, err := initializeDatabase(config) + if err != nil { + return err + } + defer db.Close() + + toProcess := make(chan FileRecord, 100) + toEmbed := make(chan EmbeddingInput, backend.BatchSize) + toThumbnail := make(chan LoadedImage, 30) + toOCR := make(chan LoadedImage, 30) + embedBatches := make(chan []EmbeddingInput, 1) + + // image loading and preliminary resizing + for range runtime.NumCPU() { + iwg.Go(func() error { + for record := range toProcess { + path := filepath.Join(config.Files, record.Filename) + buffer, err := bimg.Read(path) + if err != nil { + log.Println("could not read ", record.Filename) + } + img := bimg.NewImage(buffer) + if record.Embedding == nil { + resized, err := img.Process(bimg.Options{ + Width: int(backend.ImageSize[0]), + Height: int(backend.ImageSize[1]), + Force: true, + Type: bimg.PNG, + Interpretation: bimg.InterpretationSRGB, + }) + if err != nil { + log.Println("resize failure", record.Filename, err) + } else { + toEmbed <- EmbeddingInput{ + image: resized, + filename: record.Filename, + } + } + } + if record.Thumbnails == nil && config.EnableThumbnails { + toThumbnail <- LoadedImage{ + image: img, + filename: record.Filename, + originalSize: len(buffer), + } + } + if record.RawOcrSegments == nil && config.EnableOCR { + toOCR <- LoadedImage{ + image: img, + filename: record.Filename, + } + } + } + return nil + }) + } + + if config.EnableThumbnails { + for range runtime.NumCPU() { + wg.Go(func() error { + for image := range toThumbnail { + generatedFormats := make([]string, 0) + for formatName, formatConfig := range formats { + var err error + var resized []byte + if formatConfig.targetFilesize != 0 { + lb := 1 + ub := 100 + for { + quality := (lb + ub) / 2 + resized, err = image.image.Process(bimg.Options{ + Width: formatConfig.targetWidth, + Type: formatConfig.format, + Speed: 4, + Quality: quality, + StripMetadata: true, + Enlarge: false, + }) + if len(resized) > image.originalSize { + ub = quality + } else { + lb = quality + 1 + } + if lb >= ub { + break + } + } + } else { + resized, err = image.image.Process(bimg.Options{ + Width: formatConfig.targetWidth, + Type: formatConfig.format, + Speed: 4, + Quality: formatConfig.quality, + StripMetadata: true, + Enlarge: false, + }) + } + if err != nil { + log.Println("thumbnailing failure", image.filename, err) + continue + } + if len(resized) < image.originalSize { + generatedFormats = append(generatedFormats, formatName) + err = bimg.Write(filepath.Join(config.ThumbsPath, generateThumbnailFilename(image.filename, formatName, formatConfig)), resized) + if err != nil { + return err + } + } + } + formatsData, err := msgpack.Marshal(generatedFormats) + if err != nil { + return err + } + _, err = db.Exec("UPDATE files SET thumbnails = ?, thumbnail_time = ? WHERE filename = ?", formatsData, timestamp(), image.filename) + if err != nil { + return err + } + } + return nil + }) + } + } + + if config.EnableOCR { + for range 100 { + wg.Go(func() error { + for image := range toOCR { + scan, err := scanImage(image.image) + if err != nil { + log.Println("OCR failure", image.filename, err) + continue + } + ocrText := "" + for _, segment := range scan { + ocrText += segment.text + ocrText += "\n" + } + ocrData, err := msgpack.Marshal(scan) + if err != nil { + return err + } + _, err = db.Exec("UPDATE files SET ocr = ?, raw_ocr_segments = ?, ocr_time = ? WHERE filename = ?", ocrText, ocrData, timestamp(), image.filename) + if err != nil { + return err + } + } + return nil + }) + } + } + + wg.Go(func() error { + buffer := make([]EmbeddingInput, 0, backend.BatchSize) + for input := range toEmbed { + buffer = append(buffer, input) + if len(buffer) == int(backend.BatchSize) { + embedBatches <- buffer + buffer = make([]EmbeddingInput, 0, backend.BatchSize) + } + } + if len(buffer) > 0 { + embedBatches <- buffer + } + close(embedBatches) + return nil + }) + + for range 3 { + wg.Go(func() error { + for batch := range embedBatches { + result, err := queryClipServer[EmbeddingRequest, EmbeddingResponse](config, "", EmbeddingRequest{ + Images: lo.Map(batch, func(item EmbeddingInput, _ int) []byte { return item.image }), + }) + if err != nil { + return err + } + + tx, err := db.Begin() + if err != nil { + return err + } + for i, vector := range result { + _, err = tx.Exec("UPDATE files SET embedding_time = ?, embedding = ? WHERE filename = ?", timestamp(), vector, batch[i].filename) + if err != nil { + return err + } + } + err = tx.Commit() + if err != nil { + return err + } + } + return nil + }) + } + + filenamesOnDisk := make(map[string]struct{}) + + err = filepath.WalkDir(config.Files, func(path string, d os.DirEntry, err error) error { + filename := strings.TrimPrefix(path, config.Files) + if err != nil { + return err + } + if d.IsDir() { + return nil + } + filenamesOnDisk[filename] = struct{}{} + records := []FileRecord{} + err = db.Select(&records, "SELECT * FROM files WHERE filename = ?", filename) + if err != nil { + return err + } + stat, err := d.Info() + if err != nil { + return err + } + modtime := stat.ModTime().UnixMicro() + if len(records) == 0 || modtime > records[0].EmbedTime || modtime > records[0].OcrTime || modtime > records[0].ThumbnailTime { + _, err = db.Exec("INSERT OR IGNORE INTO files VALUES (?, 0, 0, 0, '', '', '', '')", filename) + if err != nil { + return err + } + record := FileRecord{ + Filename: filename, + } + if len(records) > 0 { + record = records[0] + } + if modtime > record.EmbedTime || len(record.Embedding) == 0 { + record.Embedding = nil + } + if modtime > record.OcrTime || len(record.RawOcrSegments) == 0 { + record.RawOcrSegments = nil + } + if modtime > record.ThumbnailTime || len(record.Thumbnails) == 0 { + record.Thumbnails = nil + } + toProcess <- record + } + return nil + }) + if err != nil { + return err + } + close(toProcess) + + err = iwg.Wait() + close(toEmbed) + close(toThumbnail) + if err != nil { + return err + } + err = wg.Wait() + if err != nil { + return err + } + + rows, err := db.Queryx("SELECT filename FROM files") + if err != nil { + return err + } + tx, err := db.Begin() + if err != nil { + return err + } + for rows.Next() { + var filename string + err := rows.Scan(&filename) + if err != nil { + return err + } + if _, ok := filenamesOnDisk[filename]; !ok { + _, err = tx.Exec("DELETE FROM files WHERE filename = ?", filename) + if err != nil { + return err + } + } + } + if err = tx.Commit(); err != nil { + return err + } + + return nil +} + +const INDEX_ADD_BATCH = 512 + +func buildIndex(config Config, backend InferenceServerConfig) (Index, error) { + var index Index + + db, err := initializeDatabase(config) + if err != nil { + return index, err + } + defer db.Close() + + newFAISSIndex, err := faiss.IndexFactory(int(backend.EmbeddingSize), "SQfp16", faiss.MetricInnerProduct) + if err != nil { + return index, err + } + index.vectors = newFAISSIndex + + var count int + err = db.Get(&count, "SELECT COUNT(*) FROM files") + if err != nil { + return index, err + } + + index.filenames = make([]string, 0, count) + index.formatCodes = make([]int64, 0, count) + buffer := make([]float32, 0, INDEX_ADD_BATCH*backend.EmbeddingSize) + index.formatNames = make([]string, 0, 5) + + record := FileRecord{} + rows, err := db.Queryx("SELECT * FROM files") + if err != nil { + return index, err + } + for rows.Next() { + err := rows.StructScan(&record) + if err != nil { + return index, err + } + if len(record.Embedding) > 0 { + index.filenames = append(index.filenames, record.Filename) + for i := 0; i < len(record.Embedding); i += 2 { + buffer = append(buffer, float16.Frombits(uint16(record.Embedding[i])+uint16(record.Embedding[i+1])<<8).Float32()) + } + if len(buffer) == cap(buffer) { + index.vectors.Add(buffer) + buffer = make([]float32, 0, INDEX_ADD_BATCH*backend.EmbeddingSize) + } + + formats := make([]string, 0, 5) + if len(record.Thumbnails) > 0 { + err := msgpack.Unmarshal(record.Thumbnails, &formats) + if err != nil { + return index, err + } + } + + formatCode := int64(0) + for _, formatString := range formats { + found := false + for i, name := range index.formatNames { + if name == formatString { + formatCode |= 1 << i + found = true + break + } + } + if !found { + newIndex := len(index.formatNames) + formatCode |= 1 << newIndex + index.formatNames = append(index.formatNames, formatString) + } + } + index.formatCodes = append(index.formatCodes, formatCode) + } + } + if len(buffer) > 0 { + index.vectors.Add(buffer) + } + + return index, nil +} + +func decodeFP16Buffer(buf []byte) []float32 { + out := make([]float32, 0, len(buf)/2) + for i := 0; i < len(buf); i += 2 { + out = append(out, float16.Frombits(uint16(buf[i])+uint16(buf[i+1])<<8).Float32()) + } + return out +} + +type EmbeddingVector []float32 + +type QueryResult struct { + Matches [][]interface{} `json:"matches"` + Formats []string `json:"formats"` + Extensions map[string]string `json:"extensions"` +} + +// this terrible language cannot express tagged unions +type QueryTerm struct { + Embedding *EmbeddingVector `json:"embedding"` + Image *string `json:"image"` // base64 + Text *string `json:"text"` + Weight *float32 `json:"weight"` +} + +type QueryRequest struct { + Terms []QueryTerm `json:"terms"` + K *int `json:"k"` +} + +func queryIndex(index *Index, query EmbeddingVector, k int) (QueryResult, error) { + var qr QueryResult + distances, ids, err := index.vectors.Search(query, int64(k)) + if err != nil { + return qr, err + } + items := lo.Map(lo.Zip2(distances, ids), func(x lo.Tuple2[float32, int64], i int) []interface{} { + return []interface{}{ + x.A, + index.filenames[x.B], + generateFilenameHash(index.filenames[x.B]), + index.formatCodes[x.B], + } + }) + + return QueryResult{ + Matches: items, + Formats: index.formatNames, + }, nil +} + +func handleRequest(config Config, backendConfig InferenceServerConfig, index *Index, w http.ResponseWriter, req *http.Request) error { + if req.Body == nil { + io.WriteString(w, "OK") // health check + return nil + } + dec := json.NewDecoder(req.Body) + var qreq QueryRequest + err := dec.Decode(&qreq) + if err != nil { + return err + } + + totalEmbedding := make(EmbeddingVector, backendConfig.EmbeddingSize) + + imageBatch := make([][]byte, 0) + imageWeights := make([]float32, 0) + textBatch := make([]string, 0) + textWeights := make([]float32, 0) + + for _, term := range qreq.Terms { + if term.Image != nil { + bytes, err := base64.StdEncoding.DecodeString(*term.Image) + if err != nil { + return err + } + loaded := bimg.NewImage(bytes) + resized, err := loaded.Process(bimg.Options{ + Width: int(backendConfig.ImageSize[0]), + Height: int(backendConfig.ImageSize[1]), + Force: true, + Type: bimg.PNG, + Interpretation: bimg.InterpretationSRGB, + }) + if err != nil { + return err + } + imageBatch = append(imageBatch, resized) + if term.Weight != nil { + imageWeights = append(imageWeights, *term.Weight) + } else { + imageWeights = append(imageWeights, 1) + } + } + if term.Text != nil { + textBatch = append(textBatch, *term.Text) + if term.Weight != nil { + textWeights = append(textWeights, *term.Weight) + } else { + textWeights = append(textWeights, 1) + } + } + if term.Embedding != nil { + weight := float32(1.0) + if term.Weight != nil { + weight = *term.Weight + } + for i := 0; i < int(backendConfig.EmbeddingSize); i += 1 { + totalEmbedding[i] += (*term.Embedding)[i] * weight + } + } + } + + if len(imageBatch) > 0 { + embs, err := queryClipServer[EmbeddingRequest, EmbeddingResponse](config, "/", EmbeddingRequest{ + Images: imageBatch, + }) + if err != nil { + return err + } + for j, emb := range embs { + embd := decodeFP16Buffer(emb) + for i := 0; i < int(backendConfig.EmbeddingSize); i += 1 { + totalEmbedding[i] += embd[i] * imageWeights[j] + } + } + } + if len(textBatch) > 0 { + embs, err := queryClipServer[EmbeddingRequest, EmbeddingResponse](config, "/", EmbeddingRequest{ + Text: textBatch, + }) + if err != nil { + return err + } + for j, emb := range embs { + embd := decodeFP16Buffer(emb) + for i := 0; i < int(backendConfig.EmbeddingSize); i += 1 { + totalEmbedding[i] += embd[i] * textWeights[j] + } + } + } + + k := 1000 + if qreq.K != nil { + k = *qreq.K + } + + w.Header().Add("Content-Type", "application/json") + enc := json.NewEncoder(w) + + qres, err := queryIndex(index, totalEmbedding, k) + + qres.Extensions = make(map[string]string) + for k, v := range imageFormats(config) { + qres.Extensions[k] = v.extension + } + + if err != nil { + return err + } + + err = enc.Encode(qres) + if err != nil { + return err + } + return nil +} + +func init() { + os.Setenv("VIPS_WARNING", "FALSE") // this does not actually work + bimg.VipsCacheSetMax(0) + bimg.VipsCacheSetMaxMem(0) +} + +func main() { + content, err := os.ReadFile(os.Args[1]) + if err != nil { + log.Fatal("config file unreadable ", err) + } + var config Config + err = json.Unmarshal(content, &config) + if err != nil { + log.Fatal("config file wrong ", err) + } + fmt.Println(config) + + db, err := sqlx.Connect("sqlite3", config.DbPath) + if err != nil { + log.Fatal("DB connection failure ", db) + } + db.MustExec(schema) + + var backend InferenceServerConfig + for { + resp, err := http.Get(config.ClipServer + "/config") + if err != nil { + log.Println("backend failed (fetch) ", err) + } + backend, err = decodeMsgpackFrom[InferenceServerConfig](resp) + resp.Body.Close() + if err != nil { + log.Println("backend failed (parse) ", err) + } else { + break + } + time.Sleep(time.Second) + } + + requestIngest := make(chan struct{}, 1) + + var index *Index + // maybe this ought to be mutexed? + var lastError *error + // there's not a neat way to reusably broadcast to multiple channels, but I *can* abuse WaitGroups probably + // this might cause horrible concurrency issues, but you brought me to this point, Go designers + var wg sync.WaitGroup + + go func() { + for { + wg.Add(1) + log.Println("ingest running") + err := ingestFiles(config, backend) + if err != nil { + log.Println("ingest failed ", err) + lastError = &err + } else { + newIndex, err := buildIndex(config, backend) + if err != nil { + log.Println("index build failed ", err) + lastError = &err + } else { + lastError = nil + index = &newIndex + } + } + wg.Done() + <-requestIngest + } + }() + newIndex, err := buildIndex(config, backend) + index = &newIndex + if err != nil { + log.Fatal("index build failed ", err) + } + + http.HandleFunc("/", func(w http.ResponseWriter, req *http.Request) { + w.Header().Add("Access-Control-Allow-Origin", "*") + w.Header().Add("Access-Control-Allow-Headers", "Content-Type") + if req.Method == "OPTIONS" { + w.WriteHeader(204) + return + } + err := handleRequest(config, backend, index, w, req) + if err != nil { + w.Header().Add("Content-Type", "application/json") + w.WriteHeader(500) + json.NewEncoder(w).Encode(map[string]string{ + "error": err.Error(), + }) + } + }) + http.HandleFunc("/reload", func(w http.ResponseWriter, req *http.Request) { + if req.Method == "POST" { + log.Println("requesting index reload") + select { + case requestIngest <- struct{}{}: + default: + } + wg.Wait() + if lastError == nil { + w.Write([]byte("OK")) + } else { + w.WriteHeader(500) + w.Write([]byte((*lastError).Error())) + } + } + }) + http.HandleFunc("/profile", func(w http.ResponseWriter, req *http.Request) { + f, err := os.Create("mem.pprof") + if err != nil { + log.Fatal("could not create memory profile: ", err) + } + defer f.Close() + var m runtime.MemStats + runtime.ReadMemStats(&m) + log.Printf("Memory usage: Alloc=%v, TotalAlloc=%v, Sys=%v", m.Alloc, m.TotalAlloc, m.Sys) + log.Println(bimg.VipsMemory()) + bimg.VipsDebugInfo() + runtime.GC() // Trigger garbage collection + if err := pprof.WriteHeapProfile(f); err != nil { + log.Fatal("could not write memory profile: ", err) + } + }) + log.Println("starting server") + http.ListenAndServe(fmt.Sprintf(":%d", config.Port), nil) +} diff --git a/misc/bad-go-version/ocr.go b/misc/bad-go-version/ocr.go new file mode 100644 index 0000000..55ca675 --- /dev/null +++ b/misc/bad-go-version/ocr.go @@ -0,0 +1,264 @@ +package main + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "mime/multipart" + "net/http" + "net/textproto" + "regexp" + "strings" + "time" + + "github.com/h2non/bimg" + "github.com/samber/lo" + "github.com/titanous/json5" +) + +const CALLBACK_REGEX string = ">AF_initDataCallback\\(({key: 'ds:1'.*?)\\);" + +type SegmentCoords struct { + x int + y int + w int + h int +} + +type Segment struct { + coords SegmentCoords + text string +} + +type ScanResult []Segment + +// TODO coordinates are negative sometimes and I think they shouldn't be +func rationalizeCoordsFormat1(imageW float64, imageH float64, centerXFraction float64, centerYFraction float64, widthFraction float64, heightFraction float64) SegmentCoords { + return SegmentCoords{ + x: int(math.Round((centerXFraction - widthFraction/2) * imageW)), + y: int(math.Round((centerYFraction - heightFraction/2) * imageH)), + w: int(math.Round(widthFraction * imageW)), + h: int(math.Round(heightFraction * imageH)), + } +} + +func scanImageChunk(image []byte, imageWidth int, imageHeight int) (ScanResult, error) { + var result ScanResult + timestamp := time.Now().UnixMicro() + var b bytes.Buffer + w := multipart.NewWriter(&b) + defer w.Close() + h := make(textproto.MIMEHeader) + h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="encoded_image"; filename="ocr%d.png"`, timestamp)) + h.Set("Content-Type", "image/png") + fw, err := w.CreatePart(h) + if err != nil { + return result, err + } + fw.Write(image) + w.Close() + + req, err := http.NewRequest("POST", fmt.Sprintf("https://lens.google.com/v3/upload?stcs=%d", timestamp), &b) + if err != nil { + return result, err + } + req.Header.Add("User-Agent", "Mozilla/5.0 (Linux; Android 13; RMX3771) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.144 Mobile Safari/537.36") + req.AddCookie(&http.Cookie{ + Name: "SOCS", + Value: "CAESEwgDEgk0ODE3Nzk3MjQaAmVuIAEaBgiA_LyaBg", + }) + req.Header.Set("Content-Type", w.FormDataContentType()) + client := http.Client{} + res, err := client.Do(req) + if err != nil { + return result, err + } + defer res.Body.Close() + body, err := io.ReadAll(res.Body) + if err != nil { + return result, err + } + re, _ := regexp.Compile(CALLBACK_REGEX) + matches := re.FindStringSubmatch(string(body[:])) + if len(matches) == 0 { + return result, fmt.Errorf("invalid API response") + } + match := matches[1] + var lensObject map[string]interface{} + err = json5.Unmarshal([]byte(match), &lensObject) + if err != nil { + return result, err + } + + if _, ok := lensObject["errorHasStatus"]; ok { + return result, errors.New("lens failed") + } + + root := lensObject["data"].([]interface{}) + + var textSegments []string + var textRegions []SegmentCoords + + // I don't know why Google did this. + // Text segments are in one place and their locations are in another, using a very strange coordinate system. + // At least I don't need whatever is contained in the base64 parts (which I assume are protobufs). + // TODO: on a few images, this seems to not work for some reason. + defer func() { + if r := recover(); r != nil { + // https://github.com/dimdenGD/chrome-lens-ocr/blob/main/src/core.js#L316 has code for a fallback text segment read mode. + // In testing, this proved unnecessary (quirks of the HTTP request? I don't know), and this only happens on textless images. + textSegments = []string{} + textRegions = []SegmentCoords{} + } + }() + + textSegmentsRaw := root[3].([]interface{})[4].([]interface{})[0].([]interface{})[0].([]interface{}) + textRegionsRaw := root[2].([]interface{})[3].([]interface{})[0].([]interface{}) + for _, x := range textRegionsRaw { + if strings.HasPrefix(x.([]interface{})[11].(string), "text:") { + rawCoords := x.([]interface{})[1].([]interface{}) + coords := rationalizeCoordsFormat1(float64(imageWidth), float64(imageHeight), rawCoords[0].(float64), rawCoords[1].(float64), rawCoords[2].(float64), rawCoords[3].(float64)) + textRegions = append(textRegions, coords) + } + } + for _, x := range textSegmentsRaw { + textSegment := x.(string) + textSegments = append(textSegments, textSegment) + } + + return lo.Map(lo.Zip2(textSegments, textRegions), func(x lo.Tuple2[string, SegmentCoords], _ int) Segment { + return Segment{ + text: x.A, + coords: x.B, + } + }), nil +} + +const MAX_DIM int = 1024 + +func scanImage(image *bimg.Image) (ScanResult, error) { + result := ScanResult{} + metadata, err := image.Metadata() + if err != nil { + return result, err + } + width := metadata.Size.Width + height := metadata.Size.Height + if width > MAX_DIM { + width = MAX_DIM + height = int(math.Round(float64(height) * (float64(width) / float64(metadata.Size.Width)))) + } + for y := 0; y < height; y += MAX_DIM { + chunkHeight := MAX_DIM + if y+chunkHeight > height { + chunkHeight = height - y + } + chunk, err := image.Process(bimg.Options{ + Height: height, // these are for overall image dimensions (resize then crop) + Width: width, + Top: y, + AreaHeight: chunkHeight, + AreaWidth: width, + Crop: true, + Type: bimg.PNG, + }) + if err != nil { + return result, err + } + res, err := scanImageChunk(chunk, width, chunkHeight) + if err != nil { + return result, err + } + for _, segment := range res { + result = append(result, Segment{ + text: segment.text, + coords: SegmentCoords{ + y: segment.coords.y + y, + x: segment.coords.x, + w: segment.coords.w, + h: segment.coords.h, + }, + }) + } + } + + return result, nil +} + +/* +async def scan_image_chunk(sess, image): + # send data to inscrutable undocumented Google service + # https://github.com/AuroraWright/owocr/blob/master/owocr/ocr.py#L193 + async with aiohttp.ClientSession() as sess: + data = aiohttp.FormData() + data.add_field( + "encoded_image", + encode_img(image), + filename="ocr" + str(timestamp) + ".png", + content_type="image/png" + ) + async with sess.post(url, headers=headers, cookies=cookies, data=data, timeout=10) as res: + body = await res.text() + + # I really worry about Google sometimes. This is not a sensible format. + match = CALLBACK_REGEX.search(body) + if match == None: + raise ValueError("Invalid callback") + + lens_object = pyjson5.loads(match.group(1)) + if "errorHasStatus" in lens_object: + raise RuntimeError("Lens failed") + + text_segments = [] + text_regions = [] + + root = lens_object["data"] + + # I don't know why Google did this. + # Text segments are in one place and their locations are in another, using a very strange coordinate system. + # At least I don't need whatever is contained in the base64 partss (which I assume are protobufs). + # TODO: on a few images, this seems to not work for some reason. + try: + text_segments = root[3][4][0][0] + text_regions = [ rationalize_coords_format1(image.width, image.height, *x[1]) for x in root[2][3][0] if x[11].startswith("text:") ] + except (KeyError, IndexError): + # https://github.com/dimdenGD/chrome-lens-ocr/blob/main/src/core.js#L316 has code for a fallback text segment read mode. + # In testing, this proved unnecessary (quirks of the HTTP request? I don't know), and this only happens on textless images. + return [], [] + + return text_segments, text_regions + +MAX_SCAN_DIM = 1000 # not actually true but close enough +def chunk_image(image: Image): + chunks = [] + # Cut image down in X axis (I'm assuming images aren't too wide to scan in downscaled form because merging text horizontally would be annoying) + if image.width > MAX_SCAN_DIM: + image = image.resize((MAX_SCAN_DIM, round(image.height * (image.width / MAX_SCAN_DIM))), Image.LANCZOS) + for y in range(0, image.height, MAX_SCAN_DIM): + chunks.append(image.crop((0, y, image.width, min(y + MAX_SCAN_DIM, image.height)))) + return chunks + +async def scan_chunks(sess: aiohttp.ClientSession, chunks: [Image]): + # If text happens to be split across the cut line it won't get read. + # This is because doing overlap read areas would be really annoying. + text = "" + regions = [] + for chunk in chunks: + new_segments, new_regions = await scan_image_chunk(sess, chunk) + for segment in new_segments: + text += segment + "\n" + for i, (segment, region) in enumerate(zip(new_segments, new_regions)): + regions.append({ **region, "y": region["y"] + (MAX_SCAN_DIM * i), "text": segment }) + return text, regions + +async def scan_image(sess: aiohttp.ClientSession, image: Image): + return await scan_chunks(sess, chunk_image(image)) + +if __name__ == "__main__": + async def main(): + async with aiohttp.ClientSession() as sess: + print(await scan_image(sess, Image.open("/data/public/memes-or-something/linear-algebra-chess.png"))) + asyncio.run(main()) +*/ diff --git a/misc/bad-go-version/problematic_thing.go b/misc/bad-go-version/problematic_thing.go new file mode 100644 index 0000000..487be9c --- /dev/null +++ b/misc/bad-go-version/problematic_thing.go @@ -0,0 +1,891 @@ +package main + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "fmt" + "hash/fnv" + "io" + "log" + "net/http" + "os" + "path/filepath" + "runtime" + "runtime/pprof" + "strings" + "sync" + "time" + + "github.com/DataIntelligenceCrew/go-faiss" + "github.com/davidbyttow/govips/v2/vips" + "github.com/h2non/bimg" + "github.com/jmoiron/sqlx" + _ "github.com/mattn/go-sqlite3" + "github.com/samber/lo" + "github.com/vmihailenco/msgpack" + "github.com/x448/float16" + "golang.org/x/sync/errgroup" +) + +type Config struct { + ClipServer string `json:"clip_server"` + DbPath string `json:"db_path"` + Port int16 `json:"port"` + Files string `json:"files"` + EnableOCR bool `json:"enable_ocr"` + ThumbsPath string `json:"thumbs_path"` + EnableThumbnails bool `json:"enable_thumbs"` +} + +type Index struct { + vectors *faiss.IndexImpl + filenames []string + formatCodes []int64 + formatNames []string +} + +var schema = ` +CREATE TABLE IF NOT EXISTS files ( + filename TEXT PRIMARY KEY, + embedding_time INTEGER, + ocr_time INTEGER, + thumbnail_time INTEGER, + embedding BLOB, + ocr TEXT, + raw_ocr_segments BLOB, + thumbnails BLOB +); + +CREATE VIRTUAL TABLE IF NOT EXISTS ocr_fts USING fts5 ( + filename, + ocr, + tokenize='unicode61 remove_diacritics 2', + content='ocr' +); + +CREATE TRIGGER IF NOT EXISTS ocr_fts_ins AFTER INSERT ON files BEGIN + INSERT INTO ocr_fts (rowid, filename, ocr) VALUES (new.rowid, new.filename, COALESCE(new.ocr, '')); +END; + +CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER DELETE ON files BEGIN + INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, '')); +END; + +CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER UPDATE ON files BEGIN + INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, '')); + INSERT INTO ocr_fts (rowid, filename, text) VALUES (new.rowid, new.filename, COALESCE(new.ocr, '')); +END; +` + +type FileRecord struct { + Filename string `db:"filename"` + EmbedTime int64 `db:"embedding_time"` + OcrTime int64 `db:"ocr_time"` + ThumbnailTime int64 `db:"thumbnail_time"` + Embedding []byte `db:"embedding"` + Ocr string `db:"ocr"` + RawOcrSegments []byte `db:"raw_ocr_segments"` + Thumbnails []byte `db:"thumbnails"` + filesize int64 +} + +type InferenceServerConfig struct { + BatchSize uint `msgpack:"batch"` + ImageSize []uint `msgpack:"image_size"` + EmbeddingSize uint `msgpack:"embedding_size"` +} + +func decodeMsgpackFrom[O interface{}](resp *http.Response) (O, error) { + var result O + respData, err := io.ReadAll(resp.Body) + if err != nil { + return result, err + } + err = msgpack.Unmarshal(respData, &result) + return result, err +} + +func queryClipServer[I interface{}, O interface{}](config Config, path string, data I) (O, error) { + var result O + b, err := msgpack.Marshal(data) + if err != nil { + return result, err + } + resp, err := http.Post(config.ClipServer+path, "application/msgpack", bytes.NewReader(b)) + if err != nil { + return result, err + } + defer resp.Body.Close() + return decodeMsgpackFrom[O](resp) +} + +type LoadedImage struct { + image *vips.ImageRef + filename string + originalSize int +} + +type EmbeddingInput struct { + image []byte + filename string +} + +type EmbeddingRequest struct { + Images [][]byte `msgpack:"images"` + Text []string `msgpack:"text"` +} + +type EmbeddingResponse = [][]byte + +func timestamp() int64 { + return time.Now().UnixMicro() +} + +type ImageFormatConfig struct { + targetWidth int + targetFilesize int + quality int + format vips.ImageType + extension string +} + +func generateFilenameHash(filename string) string { + hasher := fnv.New128() + hasher.Write([]byte(filename)) + hash := hasher.Sum(make([]byte, 0)) + return base64.RawURLEncoding.EncodeToString(hash) +} + +func generateThumbnailFilename(filename string, formatName string, formatConfig ImageFormatConfig) string { + return fmt.Sprintf("%s%s.%s", generateFilenameHash(filename), formatName, formatConfig.extension) +} + +func initializeDatabase(config Config) (*sqlx.DB, error) { + db, err := sqlx.Connect("sqlite3", config.DbPath) + if err != nil { + return nil, err + } + _, err = db.Exec("PRAGMA busy_timeout = 2000; PRAGMA journal_mode = WAL") + if err != nil { + return nil, err + } + return db, nil +} + +func imageFormats(config Config) map[string]ImageFormatConfig { + return map[string]ImageFormatConfig{ + "jpegl": { + targetWidth: 800, + quality: 70, + format: vips.ImageTypeJPEG, + extension: "jpg", + }, + "jpegh": { + targetWidth: 1600, + quality: 80, + format: vips.ImageTypeJPEG, + extension: "jpg", + }, + "jpeg256kb": { + targetWidth: 500, + targetFilesize: 256000, + format: vips.ImageTypeJPEG, + extension: "jpg", + }, + "avifh": { + targetWidth: 1600, + quality: 80, + format: vips.ImageTypeAVIF, + extension: "avif", + }, + "avifl": { + targetWidth: 800, + quality: 30, + format: vips.ImageTypeAVIF, + extension: "avif", + }, + } +} + +func ingestFiles(config Config, backend InferenceServerConfig) error { + var wg errgroup.Group + var iwg errgroup.Group + + // We assume everything is either a modern browser (low-DPI or high-DPI), an ancient browser or a ComputerCraft machine abusing Extra Utilities 2 screens. + var formats = imageFormats(config) + + db, err := initializeDatabase(config) + if err != nil { + return err + } + defer db.Close() + + toProcess := make(chan FileRecord, 100) + toEmbed := make(chan EmbeddingInput, backend.BatchSize) + toThumbnail := make(chan LoadedImage, 30) + toOCR := make(chan LoadedImage, 30) + embedBatches := make(chan []EmbeddingInput, 1) + + // image loading and preliminary resizing + for range runtime.NumCPU() { + iwg.Go(func() error { + for record := range toProcess { + path := filepath.Join(config.Files, record.Filename) + img, err := vips.LoadImageFromFile(path, &vips.ImportParams{}) + if err != nil { + log.Println("could not read", record.Filename) + continue + } + if record.Embedding == nil { + i, err := img.Copy() // TODO this is ugly, we should not need to do in-place operations + if err != nil { + return err + } + err = i.ResizeWithVScale(float64(backend.ImageSize[0])/float64(i.Width()), float64(backend.ImageSize[1])/float64(i.Height()), vips.KernelLanczos3) + if err != nil { + return err + } + resized, _, err := i.ExportPng(vips.NewPngExportParams()) + if err != nil { + log.Println("resize failure", record.Filename, err) + } else { + toEmbed <- EmbeddingInput{ + image: resized, + filename: record.Filename, + } + } + } + if record.Thumbnails == nil && config.EnableThumbnails { + toThumbnail <- LoadedImage{ + image: img, + filename: record.Filename, + originalSize: int(record.filesize), + } + } + if record.RawOcrSegments == nil && config.EnableOCR { + toOCR <- LoadedImage{ + image: img, + filename: record.Filename, + } + } + } + return nil + }) + } + + if config.EnableThumbnails { + for range runtime.NumCPU() { + wg.Go(func() error { + for image := range toThumbnail { + generatedFormats := make([]string, 0) + for formatName, formatConfig := range formats { + var err error + var resized []byte + if formatConfig.targetFilesize != 0 { + lb := 1 + ub := 100 + for { + quality := (lb + ub) / 2 + i, err := image.image.Copy() + if err != nil { + return err + } + i.Resize(float64(formatConfig.targetWidth)/float64(i.Width()), vips.KernelLanczos3) + resized, _, err = i.Export(&vips.ExportParams{ + Format: formatConfig.format, + Speed: 4, + Quality: quality, + StripMetadata: true, + }) + if len(resized) > image.originalSize { + ub = quality + } else { + lb = quality + 1 + } + if lb >= ub { + break + } + } + } else { + i, err := image.image.Copy() + if err != nil { + return err + } + i.Resize(float64(formatConfig.targetWidth)/float64(i.Width()), vips.KernelLanczos3) + resized, _, err = i.Export(&vips.ExportParams{ + Format: formatConfig.format, + Speed: 4, + Quality: formatConfig.quality, + StripMetadata: true, + }) + } + if err != nil { + log.Println("thumbnailing failure", image.filename, err) + continue + } + if len(resized) < image.originalSize { + generatedFormats = append(generatedFormats, formatName) + err = bimg.Write(filepath.Join(config.ThumbsPath, generateThumbnailFilename(image.filename, formatName, formatConfig)), resized) + if err != nil { + return err + } + } + } + formatsData, err := msgpack.Marshal(generatedFormats) + if err != nil { + return err + } + _, err = db.Exec("UPDATE files SET thumbnails = ?, thumbnail_time = ? WHERE filename = ?", formatsData, timestamp(), image.filename) + if err != nil { + return err + } + } + return nil + }) + } + } + + if config.EnableOCR { + for range 100 { + wg.Go(func() error { + for image := range toOCR { + scan, err := scanImage(image.image) + if err != nil { + log.Println("OCR failure", image.filename, err) + continue + } + ocrText := "" + for _, segment := range scan { + ocrText += segment.text + ocrText += "\n" + } + ocrData, err := msgpack.Marshal(scan) + if err != nil { + return err + } + _, err = db.Exec("UPDATE files SET ocr = ?, raw_ocr_segments = ?, ocr_time = ? WHERE filename = ?", ocrText, ocrData, timestamp(), image.filename) + if err != nil { + return err + } + } + return nil + }) + } + } + + wg.Go(func() error { + buffer := make([]EmbeddingInput, 0, backend.BatchSize) + for input := range toEmbed { + buffer = append(buffer, input) + if len(buffer) == int(backend.BatchSize) { + embedBatches <- buffer + buffer = make([]EmbeddingInput, 0, backend.BatchSize) + } + } + if len(buffer) > 0 { + embedBatches <- buffer + } + close(embedBatches) + return nil + }) + + for range 3 { + wg.Go(func() error { + for batch := range embedBatches { + result, err := queryClipServer[EmbeddingRequest, EmbeddingResponse](config, "", EmbeddingRequest{ + Images: lo.Map(batch, func(item EmbeddingInput, _ int) []byte { return item.image }), + }) + if err != nil { + return err + } + + tx, err := db.Begin() + if err != nil { + return err + } + for i, vector := range result { + _, err = tx.Exec("UPDATE files SET embedding_time = ?, embedding = ? WHERE filename = ?", timestamp(), vector, batch[i].filename) + if err != nil { + return err + } + } + err = tx.Commit() + if err != nil { + return err + } + } + return nil + }) + } + + filenamesOnDisk := make(map[string]struct{}) + + err = filepath.WalkDir(config.Files, func(path string, d os.DirEntry, err error) error { + filename := strings.TrimPrefix(path, config.Files) + if err != nil { + return err + } + if d.IsDir() { + return nil + } + filenamesOnDisk[filename] = struct{}{} + records := []FileRecord{} + err = db.Select(&records, "SELECT * FROM files WHERE filename = ?", filename) + if err != nil { + return err + } + stat, err := d.Info() + if err != nil { + return err + } + modtime := stat.ModTime().UnixMicro() + if len(records) == 0 || modtime > records[0].EmbedTime || modtime > records[0].OcrTime || modtime > records[0].ThumbnailTime { + _, err = db.Exec("INSERT OR IGNORE INTO files VALUES (?, 0, 0, 0, '', '', '', '')", filename) + if err != nil { + return err + } + record := FileRecord{ + Filename: filename, + filesize: stat.Size(), + } + if len(records) > 0 { + record = records[0] + } + if modtime > record.EmbedTime || len(record.Embedding) == 0 { + record.Embedding = nil + } + if modtime > record.OcrTime || len(record.RawOcrSegments) == 0 { + record.RawOcrSegments = nil + } + if modtime > record.ThumbnailTime || len(record.Thumbnails) == 0 { + record.Thumbnails = nil + } + toProcess <- record + } + return nil + }) + if err != nil { + return err + } + close(toProcess) + + err = iwg.Wait() + close(toEmbed) + close(toThumbnail) + if err != nil { + return err + } + err = wg.Wait() + if err != nil { + return err + } + + rows, err := db.Queryx("SELECT filename FROM files") + if err != nil { + return err + } + tx, err := db.Begin() + if err != nil { + return err + } + for rows.Next() { + var filename string + err := rows.Scan(&filename) + if err != nil { + return err + } + if _, ok := filenamesOnDisk[filename]; !ok { + _, err = tx.Exec("DELETE FROM files WHERE filename = ?", filename) + if err != nil { + return err + } + } + } + if err = tx.Commit(); err != nil { + return err + } + + return nil +} + +const INDEX_ADD_BATCH = 512 + +func buildIndex(config Config, backend InferenceServerConfig) (Index, error) { + var index Index + + db, err := initializeDatabase(config) + if err != nil { + return index, err + } + defer db.Close() + + newFAISSIndex, err := faiss.IndexFactory(int(backend.EmbeddingSize), "SQfp16", faiss.MetricInnerProduct) + if err != nil { + return index, err + } + index.vectors = newFAISSIndex + + var count int + err = db.Get(&count, "SELECT COUNT(*) FROM files") + if err != nil { + return index, err + } + + index.filenames = make([]string, 0, count) + index.formatCodes = make([]int64, 0, count) + buffer := make([]float32, 0, INDEX_ADD_BATCH*backend.EmbeddingSize) + index.formatNames = make([]string, 0, 5) + + record := FileRecord{} + rows, err := db.Queryx("SELECT * FROM files") + if err != nil { + return index, err + } + for rows.Next() { + err := rows.StructScan(&record) + if err != nil { + return index, err + } + if len(record.Embedding) > 0 { + index.filenames = append(index.filenames, record.Filename) + for i := 0; i < len(record.Embedding); i += 2 { + buffer = append(buffer, float16.Frombits(uint16(record.Embedding[i])+uint16(record.Embedding[i+1])<<8).Float32()) + } + if len(buffer) == cap(buffer) { + index.vectors.Add(buffer) + buffer = make([]float32, 0, INDEX_ADD_BATCH*backend.EmbeddingSize) + } + + formats := make([]string, 0, 5) + if len(record.Thumbnails) > 0 { + err := msgpack.Unmarshal(record.Thumbnails, &formats) + if err != nil { + return index, err + } + } + + formatCode := int64(0) + for _, formatString := range formats { + found := false + for i, name := range index.formatNames { + if name == formatString { + formatCode |= 1 << i + found = true + break + } + } + if !found { + newIndex := len(index.formatNames) + formatCode |= 1 << newIndex + index.formatNames = append(index.formatNames, formatString) + } + } + index.formatCodes = append(index.formatCodes, formatCode) + } + } + if len(buffer) > 0 { + index.vectors.Add(buffer) + } + + return index, nil +} + +func decodeFP16Buffer(buf []byte) []float32 { + out := make([]float32, 0, len(buf)/2) + for i := 0; i < len(buf); i += 2 { + out = append(out, float16.Frombits(uint16(buf[i])+uint16(buf[i+1])<<8).Float32()) + } + return out +} + +type EmbeddingVector []float32 + +type QueryResult struct { + Matches [][]interface{} `json:"matches"` + Formats []string `json:"formats"` + Extensions map[string]string `json:"extensions"` +} + +// this terrible language cannot express tagged unions +type QueryTerm struct { + Embedding *EmbeddingVector `json:"embedding"` + Image *string `json:"image"` // base64 + Text *string `json:"text"` + Weight *float32 `json:"weight"` +} + +type QueryRequest struct { + Terms []QueryTerm `json:"terms"` + K *int `json:"k"` +} + +func queryIndex(index *Index, query EmbeddingVector, k int) (QueryResult, error) { + var qr QueryResult + distances, ids, err := index.vectors.Search(query, int64(k)) + if err != nil { + return qr, err + } + items := lo.Map(lo.Zip2(distances, ids), func(x lo.Tuple2[float32, int64], i int) []interface{} { + return []interface{}{ + x.A, + index.filenames[x.B], + generateFilenameHash(index.filenames[x.B]), + index.formatCodes[x.B], + } + }) + + return QueryResult{ + Matches: items, + Formats: index.formatNames, + }, nil +} + +func handleRequest(config Config, backendConfig InferenceServerConfig, index *Index, w http.ResponseWriter, req *http.Request) error { + if req.Body == nil { + io.WriteString(w, "OK") // health check + return nil + } + dec := json.NewDecoder(req.Body) + var qreq QueryRequest + err := dec.Decode(&qreq) + if err != nil { + return err + } + + totalEmbedding := make(EmbeddingVector, backendConfig.EmbeddingSize) + + imageBatch := make([][]byte, 0) + imageWeights := make([]float32, 0) + textBatch := make([]string, 0) + textWeights := make([]float32, 0) + + for _, term := range qreq.Terms { + if term.Image != nil { + bytes, err := base64.StdEncoding.DecodeString(*term.Image) + if err != nil { + return err + } + loaded := bimg.NewImage(bytes) + resized, err := loaded.Process(bimg.Options{ + Width: int(backendConfig.ImageSize[0]), + Height: int(backendConfig.ImageSize[1]), + Force: true, + Type: bimg.PNG, + Interpretation: bimg.InterpretationSRGB, + }) + if err != nil { + return err + } + imageBatch = append(imageBatch, resized) + if term.Weight != nil { + imageWeights = append(imageWeights, *term.Weight) + } else { + imageWeights = append(imageWeights, 1) + } + } + if term.Text != nil { + textBatch = append(textBatch, *term.Text) + if term.Weight != nil { + textWeights = append(textWeights, *term.Weight) + } else { + textWeights = append(textWeights, 1) + } + } + if term.Embedding != nil { + weight := float32(1.0) + if term.Weight != nil { + weight = *term.Weight + } + for i := 0; i < int(backendConfig.EmbeddingSize); i += 1 { + totalEmbedding[i] += (*term.Embedding)[i] * weight + } + } + } + + if len(imageBatch) > 0 { + embs, err := queryClipServer[EmbeddingRequest, EmbeddingResponse](config, "/", EmbeddingRequest{ + Images: imageBatch, + }) + if err != nil { + return err + } + for j, emb := range embs { + embd := decodeFP16Buffer(emb) + for i := 0; i < int(backendConfig.EmbeddingSize); i += 1 { + totalEmbedding[i] += embd[i] * imageWeights[j] + } + } + } + if len(textBatch) > 0 { + embs, err := queryClipServer[EmbeddingRequest, EmbeddingResponse](config, "/", EmbeddingRequest{ + Text: textBatch, + }) + if err != nil { + return err + } + for j, emb := range embs { + embd := decodeFP16Buffer(emb) + for i := 0; i < int(backendConfig.EmbeddingSize); i += 1 { + totalEmbedding[i] += embd[i] * textWeights[j] + } + } + } + + k := 1000 + if qreq.K != nil { + k = *qreq.K + } + + w.Header().Add("Content-Type", "application/json") + enc := json.NewEncoder(w) + + qres, err := queryIndex(index, totalEmbedding, k) + + qres.Extensions = make(map[string]string) + for k, v := range imageFormats(config) { + qres.Extensions[k] = v.extension + } + + if err != nil { + return err + } + + err = enc.Encode(qres) + if err != nil { + return err + } + return nil +} + +func init() { + os.Setenv("VIPS_WARNING", "FALSE") // this does not actually work + bimg.VipsCacheSetMax(0) + bimg.VipsCacheSetMaxMem(0) +} + +func main() { + vips.Startup(&vips.Config{}) + defer vips.Shutdown() + + content, err := os.ReadFile(os.Args[1]) + if err != nil { + log.Fatal("config file unreadable ", err) + } + var config Config + err = json.Unmarshal(content, &config) + if err != nil { + log.Fatal("config file wrong ", err) + } + fmt.Println(config) + + db, err := sqlx.Connect("sqlite3", config.DbPath) + if err != nil { + log.Fatal("DB connection failure ", db) + } + db.MustExec(schema) + + var backend InferenceServerConfig + for { + resp, err := http.Get(config.ClipServer + "/config") + if err != nil { + log.Println("backend failed (fetch) ", err) + } + backend, err = decodeMsgpackFrom[InferenceServerConfig](resp) + resp.Body.Close() + if err != nil { + log.Println("backend failed (parse) ", err) + } else { + break + } + time.Sleep(time.Second) + } + + requestIngest := make(chan struct{}, 1) + + var index *Index + // maybe this ought to be mutexed? + var lastError *error + // there's not a neat way to reusably broadcast to multiple channels, but I *can* abuse WaitGroups probably + // this might cause horrible concurrency issues, but you brought me to this point, Go designers + var wg sync.WaitGroup + + go func() { + for { + wg.Add(1) + log.Println("ingest running") + err := ingestFiles(config, backend) + if err != nil { + log.Println("ingest failed ", err) + lastError = &err + } else { + newIndex, err := buildIndex(config, backend) + if err != nil { + log.Println("index build failed ", err) + lastError = &err + } else { + lastError = nil + index = &newIndex + } + } + wg.Done() + <-requestIngest + } + }() + newIndex, err := buildIndex(config, backend) + index = &newIndex + if err != nil { + log.Fatal("index build failed ", err) + } + + http.HandleFunc("/", func(w http.ResponseWriter, req *http.Request) { + w.Header().Add("Access-Control-Allow-Origin", "*") + w.Header().Add("Access-Control-Allow-Headers", "Content-Type") + if req.Method == "OPTIONS" { + w.WriteHeader(204) + return + } + err := handleRequest(config, backend, index, w, req) + if err != nil { + w.Header().Add("Content-Type", "application/json") + w.WriteHeader(500) + json.NewEncoder(w).Encode(map[string]string{ + "error": err.Error(), + }) + } + }) + http.HandleFunc("/reload", func(w http.ResponseWriter, req *http.Request) { + if req.Method == "POST" { + log.Println("requesting index reload") + select { + case requestIngest <- struct{}{}: + default: + } + wg.Wait() + if lastError == nil { + w.Write([]byte("OK")) + } else { + w.WriteHeader(500) + w.Write([]byte((*lastError).Error())) + } + } + }) + http.HandleFunc("/profile", func(w http.ResponseWriter, req *http.Request) { + f, err := os.Create("mem.pprof") + if err != nil { + log.Fatal("could not create memory profile: ", err) + } + defer f.Close() + var m runtime.MemStats + runtime.ReadMemStats(&m) + log.Printf("Memory usage: Alloc=%v, TotalAlloc=%v, Sys=%v", m.Alloc, m.TotalAlloc, m.Sys) + log.Println(bimg.VipsMemory()) + bimg.VipsDebugInfo() + runtime.GC() // Trigger garbage collection + if err := pprof.WriteHeapProfile(f); err != nil { + log.Fatal("could not write memory profile: ", err) + } + }) + log.Println("starting server") + http.ListenAndServe(fmt.Sprintf(":%d", config.Port), nil) +} diff --git a/misc/bad-go-version/problematic_thing_2.go b/misc/bad-go-version/problematic_thing_2.go new file mode 100644 index 0000000..3f79685 --- /dev/null +++ b/misc/bad-go-version/problematic_thing_2.go @@ -0,0 +1,265 @@ +package main + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "mime/multipart" + "net/http" + "net/textproto" + "regexp" + "strings" + "time" + + "github.com/davidbyttow/govips/v2/vips" + "github.com/samber/lo" + "github.com/titanous/json5" +) + +const CALLBACK_REGEX string = ">AF_initDataCallback\\(({key: 'ds:1'.*?)\\);" + +type SegmentCoords struct { + x int + y int + w int + h int +} + +type Segment struct { + coords SegmentCoords + text string +} + +type ScanResult []Segment + +// TODO coordinates are negative sometimes and I think they shouldn't be +func rationalizeCoordsFormat1(imageW float64, imageH float64, centerXFraction float64, centerYFraction float64, widthFraction float64, heightFraction float64) SegmentCoords { + return SegmentCoords{ + x: int(math.Round((centerXFraction - widthFraction/2) * imageW)), + y: int(math.Round((centerYFraction - heightFraction/2) * imageH)), + w: int(math.Round(widthFraction * imageW)), + h: int(math.Round(heightFraction * imageH)), + } +} + +func scanImageChunk(image []byte, imageWidth int, imageHeight int) (ScanResult, error) { + var result ScanResult + timestamp := time.Now().UnixMicro() + var b bytes.Buffer + w := multipart.NewWriter(&b) + defer w.Close() + h := make(textproto.MIMEHeader) + h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="encoded_image"; filename="ocr%d.png"`, timestamp)) + h.Set("Content-Type", "image/png") + fw, err := w.CreatePart(h) + if err != nil { + return result, err + } + fw.Write(image) + w.Close() + + req, err := http.NewRequest("POST", fmt.Sprintf("https://lens.google.com/v3/upload?stcs=%d", timestamp), &b) + if err != nil { + return result, err + } + req.Header.Add("User-Agent", "Mozilla/5.0 (Linux; Android 13; RMX3771) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.144 Mobile Safari/537.36") + req.AddCookie(&http.Cookie{ + Name: "SOCS", + Value: "CAESEwgDEgk0ODE3Nzk3MjQaAmVuIAEaBgiA_LyaBg", + }) + req.Header.Set("Content-Type", w.FormDataContentType()) + client := http.Client{} + res, err := client.Do(req) + if err != nil { + return result, err + } + defer res.Body.Close() + body, err := io.ReadAll(res.Body) + if err != nil { + return result, err + } + re, _ := regexp.Compile(CALLBACK_REGEX) + matches := re.FindStringSubmatch(string(body[:])) + if len(matches) == 0 { + return result, fmt.Errorf("invalid API response") + } + match := matches[1] + var lensObject map[string]interface{} + err = json5.Unmarshal([]byte(match), &lensObject) + if err != nil { + return result, err + } + + if _, ok := lensObject["errorHasStatus"]; ok { + return result, errors.New("lens failed") + } + + root := lensObject["data"].([]interface{}) + + var textSegments []string + var textRegions []SegmentCoords + + // I don't know why Google did this. + // Text segments are in one place and their locations are in another, using a very strange coordinate system. + // At least I don't need whatever is contained in the base64 parts (which I assume are protobufs). + // TODO: on a few images, this seems to not work for some reason. + defer func() { + if r := recover(); r != nil { + // https://github.com/dimdenGD/chrome-lens-ocr/blob/main/src/core.js#L316 has code for a fallback text segment read mode. + // In testing, this proved unnecessary (quirks of the HTTP request? I don't know), and this only happens on textless images. + textSegments = []string{} + textRegions = []SegmentCoords{} + } + }() + + textSegmentsRaw := root[3].([]interface{})[4].([]interface{})[0].([]interface{})[0].([]interface{}) + textRegionsRaw := root[2].([]interface{})[3].([]interface{})[0].([]interface{}) + for _, x := range textRegionsRaw { + if strings.HasPrefix(x.([]interface{})[11].(string), "text:") { + rawCoords := x.([]interface{})[1].([]interface{}) + coords := rationalizeCoordsFormat1(float64(imageWidth), float64(imageHeight), rawCoords[0].(float64), rawCoords[1].(float64), rawCoords[2].(float64), rawCoords[3].(float64)) + textRegions = append(textRegions, coords) + } + } + for _, x := range textSegmentsRaw { + textSegment := x.(string) + textSegments = append(textSegments, textSegment) + } + + return lo.Map(lo.Zip2(textSegments, textRegions), func(x lo.Tuple2[string, SegmentCoords], _ int) Segment { + return Segment{ + text: x.A, + coords: x.B, + } + }), nil +} + +const MAX_DIM int = 1024 + +func scanImage(image *vips.ImageRef) (ScanResult, error) { + result := ScanResult{} + width := image.Width() + height := image.Height() + if width > MAX_DIM { + width = MAX_DIM + height = int(math.Round(float64(height) * (float64(width) / float64(image.Width())))) + } + downscaled, err := image.Copy() + if err != nil { + return result, err + } + downscaled.Resize(float64(width)/float64(image.Width()), vips.KernelLanczos3) + for y := 0; y < height; y += MAX_DIM { + chunkHeight := MAX_DIM + if y+chunkHeight > height { + chunkHeight = height - y + } + chunk, err := image.Copy() // TODO this really really should not be in-place + if err != nil { + return result, err + } + err = chunk.ExtractArea(0, y, width, height) + if err != nil { + return result, err + } + buf, _, err := chunk.ExportPng(&vips.PngExportParams{}) + if err != nil { + return result, err + } + res, err := scanImageChunk(buf, width, chunkHeight) + if err != nil { + return result, err + } + for _, segment := range res { + result = append(result, Segment{ + text: segment.text, + coords: SegmentCoords{ + y: segment.coords.y + y, + x: segment.coords.x, + w: segment.coords.w, + h: segment.coords.h, + }, + }) + } + } + + return result, nil +} + +/* +async def scan_image_chunk(sess, image): + # send data to inscrutable undocumented Google service + # https://github.com/AuroraWright/owocr/blob/master/owocr/ocr.py#L193 + async with aiohttp.ClientSession() as sess: + data = aiohttp.FormData() + data.add_field( + "encoded_image", + encode_img(image), + filename="ocr" + str(timestamp) + ".png", + content_type="image/png" + ) + async with sess.post(url, headers=headers, cookies=cookies, data=data, timeout=10) as res: + body = await res.text() + + # I really worry about Google sometimes. This is not a sensible format. + match = CALLBACK_REGEX.search(body) + if match == None: + raise ValueError("Invalid callback") + + lens_object = pyjson5.loads(match.group(1)) + if "errorHasStatus" in lens_object: + raise RuntimeError("Lens failed") + + text_segments = [] + text_regions = [] + + root = lens_object["data"] + + # I don't know why Google did this. + # Text segments are in one place and their locations are in another, using a very strange coordinate system. + # At least I don't need whatever is contained in the base64 partss (which I assume are protobufs). + # TODO: on a few images, this seems to not work for some reason. + try: + text_segments = root[3][4][0][0] + text_regions = [ rationalize_coords_format1(image.width, image.height, *x[1]) for x in root[2][3][0] if x[11].startswith("text:") ] + except (KeyError, IndexError): + # https://github.com/dimdenGD/chrome-lens-ocr/blob/main/src/core.js#L316 has code for a fallback text segment read mode. + # In testing, this proved unnecessary (quirks of the HTTP request? I don't know), and this only happens on textless images. + return [], [] + + return text_segments, text_regions + +MAX_SCAN_DIM = 1000 # not actually true but close enough +def chunk_image(image: Image): + chunks = [] + # Cut image down in X axis (I'm assuming images aren't too wide to scan in downscaled form because merging text horizontally would be annoying) + if image.width > MAX_SCAN_DIM: + image = image.resize((MAX_SCAN_DIM, round(image.height * (image.width / MAX_SCAN_DIM))), Image.LANCZOS) + for y in range(0, image.height, MAX_SCAN_DIM): + chunks.append(image.crop((0, y, image.width, min(y + MAX_SCAN_DIM, image.height)))) + return chunks + +async def scan_chunks(sess: aiohttp.ClientSession, chunks: [Image]): + # If text happens to be split across the cut line it won't get read. + # This is because doing overlap read areas would be really annoying. + text = "" + regions = [] + for chunk in chunks: + new_segments, new_regions = await scan_image_chunk(sess, chunk) + for segment in new_segments: + text += segment + "\n" + for i, (segment, region) in enumerate(zip(new_segments, new_regions)): + regions.append({ **region, "y": region["y"] + (MAX_SCAN_DIM * i), "text": segment }) + return text, regions + +async def scan_image(sess: aiohttp.ClientSession, image: Image): + return await scan_chunks(sess, chunk_image(image)) + +if __name__ == "__main__": + async def main(): + async with aiohttp.ClientSession() as sess: + print(await scan_image(sess, Image.open("/data/public/memes-or-something/linear-algebra-chess.png"))) + asyncio.run(main()) +*/ diff --git a/mse.py b/mse.py index 36e1fba..782826b 100644 --- a/mse.py +++ b/mse.py @@ -12,8 +12,11 @@ import os import aiohttp_cors import json import io +import time import sys -from concurrent.futures import ProcessPoolExecutor +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path +import threading with open(sys.argv[1], "r") as config_file: CONFIG = json.load(config_file) @@ -21,26 +24,26 @@ with open(sys.argv[1], "r") as config_file: app = web.Application(client_max_size=32*1024**2) routes = web.RouteTableDef() -async def clip_server(query, unpack_buffer=True): - async with aiohttp.ClientSession() as sess: - async with sess.post(CONFIG["clip_server"], data=umsgpack.dumps(query)) as res: - response = umsgpack.loads(await res.read()) - if res.status == 200: - if unpack_buffer: - response = [ numpy.frombuffer(x, dtype="float16") for x in response ] - return response - else: - raise Exception(response if res.headers.get("content-type") == "application/msgpack" else (await res.text())) +async def clip_server(sess: aiohttp.ClientSession, query, unpack_buffer=True): + async with sess.post(CONFIG["clip_server"], data=umsgpack.dumps(query)) as res: + response = umsgpack.loads(await res.read()) + if res.status == 200: + if unpack_buffer: + response = [ numpy.frombuffer(x, dtype="float16") for x in response ] + return response + else: + raise Exception(response if res.headers.get("content-type") == "application/msgpack" else (await res.text())) @routes.post("/") async def run_query(request): + sess = app["session"] data = await request.json() embeddings = [] if images := data.get("images", []): target_image_size = app["index"].inference_server_config["image_size"] - embeddings.extend(await clip_server({ "images": [ load_image(io.BytesIO(base64.b64decode(x)), target_image_size)[0] for x, w in images ] })) + embeddings.extend(await clip_server(sess, { "images": [ load_image(io.BytesIO(base64.b64decode(x)), target_image_size)[0] for x, w in images ] })) if text := data.get("text", []): - embeddings.extend(await clip_server({ "text": [ x for x, w in text ] })) + embeddings.extend(await clip_server(sess, { "text": [ x for x, w in text ] })) weights = [ w for x, w in images ] + [ w for x, w in text ] weighted_embeddings = [ e * w for e, w in zip(embeddings, weights) ] weighted_embeddings.extend([ numpy.array(x) for x in data.get("embeddings", []) ]) @@ -65,11 +68,12 @@ def load_image(path, image_size): return buf.getvalue(), path class Index: - def __init__(self, inference_server_config): + def __init__(self, inference_server_config, http_session): self.faiss_index = faiss.IndexFlatIP(inference_server_config["embedding_size"]) self.associated_filenames = [] self.inference_server_config = inference_server_config self.lock = asyncio.Lock() + self.session = http_session def search(self, query, top_k): distances, indices = self.faiss_index.search(numpy.array([query]), top_k) @@ -80,18 +84,77 @@ class Index: except IndexError: pass return [ { "score": float(distance), "file": self.associated_filenames[index] } for index, distance in zip(indices, distances) ] + async def run_ocr(self): + if not CONFIG.get("enable_ocr"): return + + import ocr + + print("Running OCR") + + conn = await aiosqlite.connect(CONFIG["db_path"]) + unocred = await conn.execute_fetchall("SELECT files.filename FROM files LEFT JOIN ocr ON files.filename = ocr.filename WHERE ocr.scan_time IS NULL OR ocr.scan_time < files.modtime") + + ocr_sem = asyncio.Semaphore(20) # Google has more concurrency than our internal CLIP backend. I am sure they will be fine. + load_sem = threading.Semaphore(100) # provide backpressure in loading to avoid using 50GB of RAM (this happened) + + async def run_image(filename, chunks): + try: + text, regions = await ocr.scan_chunks(self.session, chunks) + await conn.execute("INSERT OR REPLACE INTO ocr VALUES (?, ?, ?, ?)", (filename, time.time(), text, json.dumps(regions))) + await conn.commit() + sys.stdout.write(".") + sys.stdout.flush() + except: + print("OCR failed on", filename) + finally: + ocr_sem.release() + + def load_and_chunk_image(filename): + load_sem.acquire() + im = Image.open(Path(CONFIG["files"]) / filename) + return filename, ocr.chunk_image(im) + + async with asyncio.TaskGroup() as tg: + with ThreadPoolExecutor(max_workers=CONFIG.get("n_workers", 1)) as executor: + for task in asyncio.as_completed([ asyncio.get_running_loop().run_in_executor(executor, load_and_chunk_image, file[0]) for file in unocred ]): + filename, chunks = await task + await ocr_sem.acquire() + tg.create_task(run_image(filename, chunks)) + load_sem.release() + async def reload(self): async with self.lock: - with ProcessPoolExecutor(max_workers=12) as executor: + with ThreadPoolExecutor(max_workers=CONFIG.get("n_workers", 1)) as executor: print("Indexing") conn = await aiosqlite.connect(CONFIG["db_path"]) conn.row_factory = aiosqlite.Row await conn.executescript(""" - CREATE TABLE IF NOT EXISTS files ( - filename TEXT PRIMARY KEY, - modtime REAL NOT NULL, - embedding_vector BLOB NOT NULL - ); +CREATE TABLE IF NOT EXISTS files ( + filename TEXT PRIMARY KEY, + modtime REAL NOT NULL, + embedding_vector BLOB NOT NULL +); +CREATE TABLE IF NOT EXISTS ocr ( + filename TEXT PRIMARY KEY REFERENCES files(filename), + scan_time INTEGER NOT NULL, + text TEXT NOT NULL, + raw_segments TEXT +); + +CREATE VIRTUAL TABLE IF NOT EXISTS ocr_fts USING fts5 ( + filename, + text, + tokenize='unicode61 remove_diacritics 2', + content='ocr' +); + +CREATE TRIGGER IF NOT EXISTS ocr_fts_ins AFTER INSERT ON ocr BEGIN + INSERT INTO ocr_fts (rowid, filename, text) VALUES (new.rowid, new.filename, new.text); +END; + +CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER DELETE ON ocr BEGIN + INSERT INTO ocr_fts (ocr_fts, rowid, filename, text) VALUES ('delete', old.rowid, old.filename, old.text); +END; """) try: async with asyncio.TaskGroup() as tg: @@ -102,7 +165,7 @@ class Index: async def do_batch(batch): try: query = { "images": [ arg[2] for arg in batch ] } - embeddings = await clip_server(query, False) + embeddings = await clip_server(self.session, query, False) await conn.executemany("INSERT OR REPLACE INTO files VALUES (?, ?, ?)", [ (filename, modtime, embedding) for (filename, modtime, _), embedding in zip(batch, embeddings) ]) @@ -188,6 +251,8 @@ class Index: finally: await conn.close() + await self.run_ocr() + app.router.add_routes(routes) cors = aiohttp_cors.setup(app, defaults={ @@ -201,8 +266,8 @@ for route in list(app.router.routes()): cors.add(route) async def main(): + sess = aiohttp.ClientSession() while True: - async with aiohttp.ClientSession() as sess: try: async with await sess.get(CONFIG["clip_server"] + "config") as res: inference_server_config = umsgpack.unpackb(await res.read()) @@ -211,8 +276,9 @@ async def main(): except: traceback.print_exc() await asyncio.sleep(1) - index = Index(inference_server_config) + index = Index(inference_server_config, sess) app["index"] = index + app["session"] = sess await index.reload() print("Ready") if CONFIG.get("no_run_server", False): return diff --git a/mse_config.json b/mse_config.json index 254da66..07c3210 100644 --- a/mse_config.json +++ b/mse_config.json @@ -1,6 +1,9 @@ { - "clip_server": "http://localhost:1708/", - "db_path": "/srv/mse/data.sqlite3", + "clip_server": "http://100.64.0.10:1708", + "db_path": "data.sqlite3", "port": 1707, - "files": "/data/public/memes-or-something/" + "files": "/data/public/memes-or-something/", + "enable_ocr": false, + "thumbs_path": "./thumbtemp", + "enable_thumbs": false } \ No newline at end of file diff --git a/ocr.py b/ocr.py new file mode 100644 index 0000000..7c4f8c9 --- /dev/null +++ b/ocr.py @@ -0,0 +1,101 @@ +import pyjson5 +import re +import asyncio +import aiohttp +from PIL import Image +import time +import io + +CALLBACK_REGEX = re.compile(r">AF_initDataCallback\(({key: 'ds:1'.*?)\);") + +def encode_img(img): + image_bytes = io.BytesIO() + img.save(image_bytes, format="PNG", compress_level=6) + return image_bytes.getvalue() + +def rationalize_coords_format1(image_w, image_h, center_x_fraction, center_y_fraction, width_fraction, height_fraction, mysterious): + return { + "x": round((center_x_fraction - width_fraction / 2) * image_w), + "y": round((center_y_fraction - height_fraction / 2) * image_h), + "w": round(width_fraction * image_w), + "h": round(height_fraction * image_h) + } + +async def scan_image_chunk(sess, image): + timestamp = int(time.time() * 1000) + url = f"https://lens.google.com/v3/upload?stcs={timestamp}" + headers = {"User-Agent": "Mozilla/5.0 (Linux; Android 13; RMX3771) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.144 Mobile Safari/537.36"} + cookies = {"SOCS": "CAESEwgDEgk0ODE3Nzk3MjQaAmVuIAEaBgiA_LyaBg"} + + # send data to inscrutable undocumented Google service + # https://github.com/AuroraWright/owocr/blob/master/owocr/ocr.py#L193 + async with aiohttp.ClientSession() as sess: + data = aiohttp.FormData() + data.add_field( + "encoded_image", + encode_img(image), + filename="ocr" + str(timestamp) + ".png", + content_type="image/png" + ) + async with sess.post(url, headers=headers, cookies=cookies, data=data, timeout=10) as res: + body = await res.text() + + # I really worry about Google sometimes. This is not a sensible format. + match = CALLBACK_REGEX.search(body) + if match == None: + raise ValueError("Invalid callback") + + lens_object = pyjson5.loads(match.group(1)) + if "errorHasStatus" in lens_object: + raise RuntimeError("Lens failed") + + text_segments = [] + text_regions = [] + + root = lens_object["data"] + + # I don't know why Google did this. + # Text segments are in one place and their locations are in another, using a very strange coordinate system. + # At least I don't need whatever is contained in the base64 parts (which I assume are protobufs). + # TODO: on a few images, this seems to not work for some reason. + try: + text_segments = root[3][4][0][0] + text_regions = [ rationalize_coords_format1(image.width, image.height, *x[1]) for x in root[2][3][0] if x[11].startswith("text:") ] + except (KeyError, IndexError): + # https://github.com/dimdenGD/chrome-lens-ocr/blob/main/src/core.js#L316 has code for a fallback text segment read mode. + # In testing, this proved unnecessary (quirks of the HTTP request? I don't know), and this only happens on textless images. + return [], [] + + return text_segments, text_regions + +MAX_SCAN_DIM = 1000 # not actually true but close enough +def chunk_image(image: Image): + chunks = [] + # Cut image down in X axis (I'm assuming images aren't too wide to scan in downscaled form because merging text horizontally would be annoying) + if image.width > MAX_SCAN_DIM: + image = image.resize((MAX_SCAN_DIM, round(image.height * (image.width / MAX_SCAN_DIM))), Image.LANCZOS) + for y in range(0, image.height, MAX_SCAN_DIM): + chunks.append(image.crop((0, y, image.width, min(y + MAX_SCAN_DIM, image.height)))) + return chunks + +async def scan_chunks(sess: aiohttp.ClientSession, chunks: [Image]): + # If text happens to be split across the cut line it won't get read. + # This is because doing overlap read areas would be really annoying. + text = "" + regions = [] + for chunk in chunks: + new_segments, new_regions = await scan_image_chunk(sess, chunk) + for segment in new_segments: + text += segment + "\n" + for i, (segment, region) in enumerate(zip(new_segments, new_regions)): + regions.append({ **region, "y": region["y"] + (MAX_SCAN_DIM * i), "text": segment }) + return text, regions + +async def scan_image(sess: aiohttp.ClientSession, image: Image): + return await scan_chunks(sess, chunk_image(image)) + +if __name__ == "__main__": + async def main(): + async with aiohttp.ClientSession() as sess: + print(await scan_image(sess, Image.open("/data/public/memes-or-something/linear-algebra-chess.png"))) + asyncio.run(main()) \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..2c37f64 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,892 @@ +use std::{collections::HashMap, io::Cursor}; +use std::path::Path; +use std::sync::Arc; + +use anyhow::{Result, Context}; +use axum::body::Body; +use axum::response::Response; +use axum::{ + extract::Json, + response::IntoResponse, + routing::{get, post}, + Router, + http::StatusCode +}; +use image::{imageops::FilterType, io::Reader as ImageReader, DynamicImage, ImageFormat}; +use reqwest::Client; +use serde::{Deserialize, Serialize}; +use sqlx::{sqlite::SqliteConnectOptions, SqlitePool}; +use tokio::sync::{broadcast, mpsc}; +use tokio::task::JoinHandle; +use walkdir::WalkDir; +use base64::prelude::*; +use faiss::Index; +use futures_util::stream::{StreamExt, TryStreamExt}; +use tokio_stream::wrappers::ReceiverStream; +use tower_http::cors::CorsLayer; + +mod ocr; + +use crate::ocr::scan_image; + +fn function_which_returns_50() -> usize { 50 } + +#[derive(Debug, Deserialize, Clone)] +struct Config { + clip_server: String, + db_path: String, + port: u16, + files: String, + #[serde(default)] + enable_ocr: bool, + #[serde(default)] + thumbs_path: String, + #[serde(default)] + enable_thumbs: bool, + #[serde(default="function_which_returns_50")] + ocr_concurrency: usize, + #[serde(default)] + no_run_server: bool +} + +#[derive(Debug)] +struct IIndex { + vectors: faiss::index::IndexImpl, + filenames: Vec, + format_codes: Vec, + format_names: Vec, +} + +const SCHEMA: &str = r#" +CREATE TABLE IF NOT EXISTS files ( + filename TEXT NOT NULL PRIMARY KEY, + embedding_time INTEGER, + ocr_time INTEGER, + thumbnail_time INTEGER, + embedding BLOB, + ocr TEXT, + raw_ocr_segments BLOB, + thumbnails BLOB +); + +CREATE VIRTUAL TABLE IF NOT EXISTS ocr_fts USING fts5 ( + filename, + ocr, + tokenize='unicode61 remove_diacritics 2', + content='ocr' +); + +CREATE TRIGGER IF NOT EXISTS ocr_fts_ins AFTER INSERT ON files BEGIN + INSERT INTO ocr_fts (rowid, filename, ocr) VALUES (new.rowid, new.filename, COALESCE(new.ocr, '')); +END; + +CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER DELETE ON files BEGIN + INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, '')); +END; + +CREATE TRIGGER IF NOT EXISTS ocr_fts_del AFTER UPDATE ON files BEGIN + INSERT INTO ocr_fts (ocr_fts, rowid, filename, ocr) VALUES ('delete', old.rowid, old.filename, COALESCE(old.ocr, '')); + INSERT INTO ocr_fts (rowid, filename, text) VALUES (new.rowid, new.filename, COALESCE(new.ocr, '')); +END; +"#; + +#[derive(Debug, sqlx::FromRow, Clone, Default)] +struct FileRecord { + filename: String, + embedding_time: Option, + ocr_time: Option, + thumbnail_time: Option, + embedding: Option>, + // this totally "will" be used later + ocr: Option, + raw_ocr_segments: Option>, + thumbnails: Option>, +} + +#[derive(Debug, Deserialize, Clone)] +struct InferenceServerConfig { + batch: usize, + image_size: (u32, u32), + embedding_size: usize, +} + +async fn query_clip_server( + client: &Client, + config: &Config, + path: &str, + data: I, +) -> Result where I: Serialize, O: serde::de::DeserializeOwned, +{ + let response = client + .post(&format!("{}{}", config.clip_server, path)) + .header("Content-Type", "application/msgpack") + .body(rmp_serde::to_vec_named(&data)?) + .send() + .await?; + let result: O = rmp_serde::from_slice(&response.bytes().await?)?; + Ok(result) +} + +#[derive(Debug)] +struct LoadedImage { + image: Arc, + filename: String, + original_size: usize, +} + +#[derive(Debug)] +struct EmbeddingInput { + image: Vec, + filename: String, +} + +#[derive(Debug, Serialize)] +#[serde(untagged)] +enum EmbeddingRequest { + Images { images: Vec }, + Text { text: Vec } +} + +fn timestamp() -> i64 { + chrono::Utc::now().timestamp_micros() +} + +#[derive(Debug, Clone)] +struct ImageFormatConfig { + target_width: u32, + target_filesize: u32, + quality: u8, + format: ImageFormat, + extension: String, +} + +fn generate_filename_hash(filename: &str) -> String { + use std::hash::{Hash, Hasher}; + let mut hasher = fnv::FnvHasher::default(); + filename.hash(&mut hasher); + BASE64_URL_SAFE_NO_PAD.encode(hasher.finish().to_le_bytes()) +} + +fn generate_thumbnail_filename( + filename: &str, + format_name: &str, + format_config: &ImageFormatConfig, +) -> String { + format!( + "{}{}.{}", + generate_filename_hash(filename), + format_name, + format_config.extension + ) +} + +async fn initialize_database(config: &Config) -> Result { + let connection_options = SqliteConnectOptions::new() + .filename(&config.db_path) + .create_if_missing(true); + let pool = SqlitePool::connect_with(connection_options).await?; + sqlx::query(SCHEMA).execute(&pool).await?; + Ok(pool) +} + +fn image_formats(_config: &Config) -> HashMap { + let mut formats = HashMap::new(); + formats.insert( + "jpegl".to_string(), + ImageFormatConfig { + target_width: 800, + target_filesize: 0, + quality: 70, + format: ImageFormat::Jpeg, + extension: "jpg".to_string(), + }, + ); + formats.insert( + "jpegh".to_string(), + ImageFormatConfig { + target_width: 1600, + target_filesize: 0, + quality: 80, + format: ImageFormat::Jpeg, + extension: "jpg".to_string(), + }, + ); + formats.insert( + "jpeg256kb".to_string(), + ImageFormatConfig { + target_width: 500, + target_filesize: 256000, + quality: 0, + format: ImageFormat::Jpeg, + extension: "jpg".to_string(), + }, + ); + formats.insert( + "avifh".to_string(), + ImageFormatConfig { + target_width: 1600, + target_filesize: 0, + quality: 80, + format: ImageFormat::Avif, + extension: "avif".to_string(), + }, + ); + formats.insert( + "avifl".to_string(), + ImageFormatConfig { + target_width: 800, + target_filesize: 0, + quality: 30, + format: ImageFormat::Avif, + extension: "avif".to_string(), + }, + ); + formats +} + +async fn resize_for_embed(backend_config: Arc, image: Arc) -> Result> { + let resized = tokio::task::spawn_blocking(move || { + let new = image.resize( + backend_config.image_size.0, + backend_config.image_size.1, + FilterType::Lanczos3 + ); + let mut buf = Vec::new(); + let mut csr = Cursor::new(&mut buf); + new.write_to(&mut csr, ImageFormat::Png)?; + Ok::, anyhow::Error>(buf) + }).await??; + Ok(resized) +} + +async fn ingest_files(config: Arc, backend: Arc) -> Result<()> { + let pool = initialize_database(&config).await?; + let client = Client::new(); + + let formats = image_formats(&config); + + let (to_process_tx, to_process_rx) = mpsc::channel::(100); + let (to_embed_tx, to_embed_rx) = mpsc::channel(backend.batch as usize); + let (to_thumbnail_tx, to_thumbnail_rx) = mpsc::channel(30); + let (to_ocr_tx, to_ocr_rx) = mpsc::channel(30); + + let cpus = num_cpus::get(); + + // Image loading and preliminary resizing + let image_loading: JoinHandle> = tokio::spawn({ + let config = config.clone(); + let backend = backend.clone(); + let stream = ReceiverStream::new(to_process_rx).map(Ok); + stream.try_for_each_concurrent(Some(cpus), move |record| { + let config = config.clone(); + let backend = backend.clone(); + let to_embed_tx = to_embed_tx.clone(); + let to_thumbnail_tx = to_thumbnail_tx.clone(); + let to_ocr_tx = to_ocr_tx.clone(); + async move { + let path = Path::new(&config.files).join(&record.filename); + let image: Result> = tokio::task::block_in_place(|| Ok(Arc::new(ImageReader::open(&path)?.with_guessed_format()?.decode()?))); + let image = match image { + Ok(image) => image, + Err(e) => { + log::error!("Could not read {}: {}", record.filename, e); + return Ok(()) + } + }; + if record.embedding.is_none() { + let resized = resize_for_embed(backend.clone(), image.clone()).await?; + + to_embed_tx.send(EmbeddingInput { image: resized, filename: record.filename.clone() }).await? + } + if record.thumbnails.is_none() && config.enable_thumbs { + to_thumbnail_tx + .send(LoadedImage { + image: image.clone(), + filename: record.filename.clone(), + original_size: std::fs::metadata(&path)?.len() as usize, + }) + .await?; + } + if record.raw_ocr_segments.is_none() && config.enable_ocr { + to_ocr_tx + .send(LoadedImage { + image, + filename: record.filename.clone(), + original_size: 0, + }) + .await?; + } + Ok(()) + } + }) + }); + + // Thumbnail generation + let thumbnail_generation: Option>> = if config.enable_thumbs { + let config = config.clone(); + let pool = pool.clone(); + let stream = ReceiverStream::new(to_thumbnail_rx).map(Ok); + let formats = Arc::new(formats); + Some(tokio::spawn({ + stream.try_for_each_concurrent(Some(cpus), move |image| { + use image::codecs::*; + + let formats = formats.clone(); + let config = config.clone(); + let pool = pool.clone(); + async move { + let filename = image.filename.clone(); + log::debug!("thumbnailing {}", filename); + let generated_formats = tokio::task::spawn_blocking(move || { + let mut generated_formats = Vec::new(); + let rgb = DynamicImage::from(image.image.to_rgb8()); + for (format_name, format_config) in &*formats { + let resized = if format_config.target_filesize != 0 { + let mut lb = 1; + let mut ub = 100; + loop { + let quality = (lb + ub) / 2; + let thumbnail = rgb.resize( + format_config.target_width, + u32::MAX, + FilterType::Lanczos3, + ); + let mut buf: Vec = Vec::new(); + let mut csr = Cursor::new(&mut buf); + // this is ugly but I don't actually know how to fix it (cannot factor it out due to issues with dyn Trait) + match format_config.format { + ImageFormat::Avif => thumbnail.write_with_encoder(avif::AvifEncoder::new_with_speed_quality(&mut csr, 4, quality)), + ImageFormat::Jpeg => thumbnail.write_with_encoder(jpeg::JpegEncoder::new_with_quality(&mut csr, quality)), + _ => unimplemented!() + }?; + if buf.len() > image.original_size { + ub = quality; + } else { + lb = quality + 1; + } + if lb >= ub { + break buf; + } + } + } else { + let thumbnail = rgb.resize( + format_config.target_width, + u32::MAX, + FilterType::Lanczos3, + ); + let mut buf: Vec = Vec::new(); + let mut csr = Cursor::new(&mut buf); + match format_config.format { + ImageFormat::Avif => thumbnail.write_with_encoder(avif::AvifEncoder::new_with_speed_quality(&mut csr, 4, format_config.quality)), + ImageFormat::Jpeg => thumbnail.write_with_encoder(jpeg::JpegEncoder::new_with_quality(&mut csr, format_config.quality)), + ImageFormat::WebP => thumbnail.write_with_encoder(webp::WebPEncoder::new_lossless(&mut csr)), + _ => unimplemented!() + }?; + buf + }; + if resized.len() < image.original_size { + generated_formats.push(format_name.clone()); + let thumbnail_path = Path::new(&config.thumbs_path).join( + generate_thumbnail_filename( + &image.filename, + format_name, + format_config, + ), + ); + std::fs::write(thumbnail_path, resized)?; + } + } + Ok::, anyhow::Error>(generated_formats) + }).await??; + let formats_data = rmp_serde::to_vec(&generated_formats)?; + let ts = timestamp(); + sqlx::query!( + "UPDATE files SET thumbnails = ?, thumbnail_time = ? WHERE filename = ?", + formats_data, + ts, + filename + ) + .execute(&pool) + .await?; + Ok(()) + } + }) + })) + } else { + None + }; + + // OCR + let ocr: Option>> = if config.enable_ocr { + let client = client.clone(); + let pool = pool.clone(); + let stream = ReceiverStream::new(to_ocr_rx).map(Ok); + Some(tokio::spawn({ + stream.try_for_each_concurrent(Some(config.ocr_concurrency), move |image| { + let client = client.clone(); + let pool = pool.clone(); + async move { + log::debug!("OCRing {}", image.filename); + let scan = match scan_image(&client, &image.image).await { + Ok(scan) => scan, + Err(e) => { + log::error!("OCR failure {}: {}", image.filename, e); + return Ok(()) + } + }; + let ocr_text = scan + .iter() + .map(|segment| segment.text.clone()) + .collect::>() + .join("\n"); + let ocr_data = rmp_serde::to_vec(&scan)?; + let ts = timestamp(); + sqlx::query!( + "UPDATE files SET ocr = ?, raw_ocr_segments = ?, ocr_time = ? WHERE filename = ?", + ocr_text, + ocr_data, + ts, + image.filename + ) + .execute(&pool) + .await?; + Ok(()) + } + }) + })) + } else { + None + }; + + let embedding_generation: JoinHandle> = tokio::spawn({ + let stream = ReceiverStream::new(to_embed_rx).chunks(backend.batch); + let client = client.clone(); + let config = config.clone(); + let pool = pool.clone(); + // keep multiple embedding requests in flight + stream.map(Ok).try_for_each_concurrent(Some(3), move |batch| { + let client = client.clone(); + let config = config.clone(); + let pool = pool.clone(); + async move { + let result: Vec = query_clip_server( + &client, + &config, + "", + EmbeddingRequest::Images { + images: batch.iter().map(|input| serde_bytes::ByteBuf::from(input.image.clone())).collect(), + }, + ).await.context("querying CLIP server")?; + + let mut tx = pool.begin().await?; + let ts = timestamp(); + for (i, vector) in result.into_iter().enumerate() { + let vector = vector.into_vec(); + log::debug!("embedded {}", batch[i].filename); + sqlx::query!( + "UPDATE files SET embedding_time = ?, embedding = ? WHERE filename = ?", + ts, + vector, + batch[i].filename + ) + .execute(&mut *tx) + .await?; + } + tx.commit().await?; + anyhow::Result::Ok(()) + } + }) + }); + + let mut filenames = HashMap::new(); + + // blocking OS calls + tokio::task::block_in_place(|| -> anyhow::Result<()> { + for entry in WalkDir::new(config.files.as_str()) { + let entry = entry?; + let path = entry.path(); + if path.is_file() { + let filename = path.strip_prefix(&config.files)?.to_str().unwrap().to_string(); + let modtime = entry.metadata()?.modified()?.duration_since(std::time::UNIX_EPOCH)?; + let modtime = modtime.as_micros() as i64; + filenames.insert(filename.clone(), (path.to_path_buf(), modtime)); + } + } + Ok(()) + })?; + + log::debug!("finished reading filenames"); + + for (filename, (_path, modtime)) in filenames.iter() { + let modtime = *modtime; + let record = sqlx::query_as!(FileRecord, "SELECT * FROM files WHERE filename = ?", filename) + .fetch_optional(&pool) + .await?; + + let new_record = match record { + None => Some(FileRecord { + filename: filename.clone(), + ..Default::default() + }), + Some(r) if modtime > r.embedding_time.unwrap_or(i64::MIN) || (modtime > r.ocr_time.unwrap_or(i64::MIN) && config.enable_ocr) || (modtime > r.thumbnail_time.unwrap_or(i64::MIN) && config.enable_thumbs) => { + Some(r) + }, + _ => None + }; + if let Some(mut record) = new_record { + log::debug!("processing {}", record.filename); + sqlx::query!("INSERT OR IGNORE INTO files (filename) VALUES (?)", filename) + .execute(&pool) + .await?; + if modtime > record.embedding_time.unwrap_or(i64::MIN) { + record.embedding = None; + } + if modtime > record.ocr_time.unwrap_or(i64::MIN) { + record.raw_ocr_segments = None; + } + if modtime > record.thumbnail_time.unwrap_or(i64::MIN) { + record.thumbnails = None; + } + // we need to exit here to actually capture the error + if !to_process_tx.send(record).await.is_ok() { + break + } + } + } + + drop(to_process_tx); + + embedding_generation.await?.context("generating embeddings")?; + + if let Some(thumbnail_generation) = thumbnail_generation { + thumbnail_generation.await?.context("generating thumbnails")?; + } + + if let Some(ocr) = ocr { + ocr.await?.context("OCRing")?; + } + + image_loading.await?.context("loading images")?; + + let stored: Vec = sqlx::query_scalar("SELECT filename FROM files").fetch_all(&pool).await?; + let mut tx = pool.begin().await?; + for filename in stored { + if !filenames.contains_key(&filename) { + sqlx::query!("DELETE FROM files WHERE filename = ?", filename) + .execute(&mut *tx) + .await?; + } + } + tx.commit().await?; + + log::info!("ingest done"); + + Result::Ok(()) +} + +const INDEX_ADD_BATCH: usize = 512; + +async fn build_index(config: Arc, backend: Arc) -> Result { + let pool = initialize_database(&config).await?; + + let mut index = IIndex { + // Use a suitable vector similarity search library for Rust + vectors: faiss::index_factory(backend.embedding_size as u32, "SQfp16", faiss::MetricType::InnerProduct)?, + filenames: Vec::new(), + format_codes: Vec::new(), + format_names: Vec::new(), + }; + + let count: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM files") + .fetch_one(&pool) + .await?; + + index.filenames = Vec::with_capacity(count as usize); + index.format_codes = Vec::with_capacity(count as usize); + let mut buffer = Vec::with_capacity(INDEX_ADD_BATCH * backend.embedding_size as usize); + index.format_names = Vec::with_capacity(5); + + let mut rows = sqlx::query_as::<_, FileRecord>("SELECT * FROM files").fetch(&pool); + while let Some(record) = rows.try_next().await? { + if let Some(emb) = record.embedding { + index.filenames.push(record.filename); + for i in (0..emb.len()).step_by(2) { + buffer.push( + half::f16::from_le_bytes([emb[i], emb[i + 1]]) + .to_f32(), + ); + } + if buffer.len() == buffer.capacity() { + index.vectors.add(&buffer)?; + buffer.clear(); + } + + let mut formats: Vec = Vec::new(); + if let Some(t) = record.thumbnails { + formats = rmp_serde::from_slice(&t)?; + } + + let mut format_code = 0; + for format_string in &formats { + let mut found = false; + for (i, name) in index.format_names.iter().enumerate() { + if name == format_string { + format_code |= 1 << i; + found = true; + break; + } + } + if !found { + let new_index = index.format_names.len(); + format_code |= 1 << new_index; + index.format_names.push(format_string.clone()); + } + } + index.format_codes.push(format_code); + } + } + if !buffer.is_empty() { + index.vectors.add(&buffer)?; + } + + Ok(index) +} + +fn decode_fp16_buffer(buf: &[u8]) -> Vec { + buf.chunks_exact(2) + .map(|chunk| half::f16::from_le_bytes([chunk[0], chunk[1]]).to_f32()) + .collect() +} + +type EmbeddingVector = Vec; + +#[derive(Debug, Serialize)] +struct QueryResult { + matches: Vec<(f32, String, String, u64)>, + formats: Vec, + extensions: HashMap, +} + +#[derive(Debug, Deserialize)] +struct QueryTerm { + embedding: Option, + image: Option, + text: Option, + weight: Option, +} + +#[derive(Debug, Deserialize)] +struct QueryRequest { + terms: Vec, + k: Option, +} + +async fn query_index(index: &mut IIndex, query: EmbeddingVector, k: usize) -> Result { + let result = index.vectors.search(&query, k as usize)?; + + let items = result.distances + .into_iter() + .zip(result.labels) + .filter_map(|(distance, id)| { + let id = id.get()? as usize; + Some(( + distance, + index.filenames[id].clone(), + generate_filename_hash(&index.filenames[id as usize]).clone(), + index.format_codes[id] + )) + }) + .collect(); + + Ok(QueryResult { + matches: items, + formats: index.format_names.clone(), + extensions: HashMap::new(), + }) +} + +async fn handle_request( + config: &Config, + backend_config: Arc, + client: Arc, + index: &mut IIndex, + req: Json, +) -> Result> { + let mut total_embedding = ndarray::Array::from(vec![0.0; backend_config.embedding_size]); + + let mut image_batch = Vec::new(); + let mut image_weights = Vec::new(); + let mut text_batch = Vec::new(); + let mut text_weights = Vec::new(); + + for term in &req.terms { + if let Some(image) = &term.image { + let bytes = BASE64_STANDARD.decode(image)?; + let image = Arc::new(tokio::task::block_in_place(|| image::load_from_memory(&bytes))?); + image_batch.push(serde_bytes::ByteBuf::from(resize_for_embed(backend_config.clone(), image).await?)); + image_weights.push(term.weight.unwrap_or(1.0)); + } + if let Some(text) = &term.text { + text_batch.push(text.clone()); + text_weights.push(term.weight.unwrap_or(1.0)); + } + if let Some(embedding) = &term.embedding { + let weight = term.weight.unwrap_or(1.0); + for (i, value) in embedding.iter().enumerate() { + total_embedding[i] += value * weight; + } + } + } + + let mut batches = vec![]; + + if !image_batch.is_empty() { + batches.push( + EmbeddingRequest::Images { + images: image_batch + } + ); + } + if !text_batch.is_empty() { + batches.push( + EmbeddingRequest::Text { + text: text_batch, + } + ); + } + + for batch in batches { + let embs: Vec> = query_clip_server(&client, config, "/", batch).await?; + for emb in embs { + total_embedding += &ndarray::Array::from_vec(decode_fp16_buffer(&emb)); + } + } + + let k = req.k.unwrap_or(1000); + let qres = query_index(index, total_embedding.to_vec(), k).await?; + + let mut extensions = HashMap::new(); + for (k, v) in image_formats(config) { + extensions.insert(k, v.extension); + } + + Ok(Json(QueryResult { + matches: qres.matches, + formats: qres.formats, + extensions, + }).into_response()) +} + +async fn get_backend_config(config: &Config) -> Result { + let res = Client::new().get(&format!("{}/config", config.clip_server)).send().await?; + Ok(rmp_serde::from_slice(&res.bytes().await?)?) +} + +#[tokio::main] +async fn main() -> Result<()> { + pretty_env_logger::init(); + + let config_path = std::env::args().nth(1).expect("Missing config file path"); + let config: Arc = Arc::new(serde_json::from_slice(&std::fs::read(config_path)?)?); + + let pool = initialize_database(&config).await?; + sqlx::query(SCHEMA).execute(&pool).await?; + + let backend = Arc::new(loop { + match get_backend_config(&config).await { + Ok(backend) => break backend, + Err(e) => { + log::error!("Backend failed (fetch): {}", e); + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + } + } + }); + + if config.no_run_server { + ingest_files(config.clone(), backend.clone()).await?; + return Ok(()) + } + + let (request_ingest_tx, mut request_ingest_rx) = mpsc::channel(1); + + let index = Arc::new(tokio::sync::Mutex::new(build_index(config.clone(), backend.clone()).await?)); + + let (ingest_done_tx, _ingest_done_rx) = broadcast::channel(1); + let done_tx = Arc::new(ingest_done_tx.clone()); + + let _ingest_task = tokio::spawn({ + let config = config.clone(); + let backend = backend.clone(); + let index = index.clone(); + async move { + loop { + log::info!("Ingest running"); + match ingest_files(config.clone(), backend.clone()).await { + Ok(_) => { + match build_index(config.clone(), backend.clone()).await { + Ok(new_index) => { + *index.lock().await = new_index; + } + Err(e) => { + log::error!("Index build failed: {:?}", e); + ingest_done_tx.send((false, format!("{:?}", e))).unwrap(); + } + } + } + Err(e) => { + log::error!("Ingest failed: {:?}", e); + ingest_done_tx.send((false, format!("{:?}", e))).unwrap(); + } + } + ingest_done_tx.send((true, format!("OK"))).unwrap(); + request_ingest_rx.recv().await; + } + } + }); + + let cors = CorsLayer::permissive(); + + let config_ = config.clone(); + let client = Arc::new(Client::new()); + let app = Router::new() + .route("/", post(|req| async move { + let config = config.clone(); + let backend_config = backend.clone(); + let mut index = index.lock().await; // TODO: use ConcurrentIndex here + let client = client.clone(); + handle_request(&config, backend_config, client.clone(), &mut index, req).await.map_err(|e| format!("{:?}", e)) + })) + .route("/", get(|_req: axum::http::Request| async move { + "OK" + })) + .route("/reload", post(|_req: axum::http::Request| async move { + log::info!("Requesting index reload"); + let mut done_rx = done_tx.clone().subscribe(); + let _ = request_ingest_tx.send(()).await; // ignore possible error, which is presumably because the queue is full + match done_rx.recv().await { + Ok((true, status)) => { + let mut res = status.into_response(); + *res.status_mut() = StatusCode::OK; + res + }, + Ok((false, status)) => { + let mut res = status.into_response(); + *res.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; + res + }, + Err(_) => { + let mut res = "internal error".into_response(); + *res.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; + res + } + } + })) + .layer(cors); + + let addr = format!("0.0.0.0:{}", config_.port); + log::info!("Starting server on {}", addr); + let listener = tokio::net::TcpListener::bind(&addr).await.unwrap(); + axum::serve(listener, app).await?; + + Ok(()) +} \ No newline at end of file diff --git a/src/ocr.rs b/src/ocr.rs new file mode 100644 index 0000000..90e523e --- /dev/null +++ b/src/ocr.rs @@ -0,0 +1,173 @@ +use anyhow::{anyhow, Result}; +use image::{DynamicImage, GenericImageView, ImageFormat}; +use regex::Regex; +use reqwest::{ + header::{HeaderMap, HeaderValue}, + multipart::{Form, Part}, + Client, +}; +use serde_json::Value; +use std::{io::Cursor, time::{SystemTime, UNIX_EPOCH}}; +use serde::{Deserialize, Serialize}; + +const CALLBACK_REGEX: &str = r">AF_initDataCallback\((\{key: 'ds:1'.*?\})\);"; +const MAX_DIM: u32 = 1024; + +#[derive(Debug, Serialize, Deserialize)] +pub struct SegmentCoords { + pub x: i32, + pub y: i32, + pub w: i32, + pub h: i32, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct Segment { + pub coords: SegmentCoords, + pub text: String, +} + +pub type ScanResult = Vec; + +fn rationalize_coords_format1( + image_w: f64, + image_h: f64, + center_x_fraction: f64, + center_y_fraction: f64, + width_fraction: f64, + height_fraction: f64, +) -> SegmentCoords { + SegmentCoords { + x: ((center_x_fraction - width_fraction / 2.0) * image_w).round() as i32, + y: ((center_y_fraction - height_fraction / 2.0) * image_h).round() as i32, + w: (width_fraction * image_w).round() as i32, + h: (height_fraction * image_h).round() as i32, + } +} + +async fn scan_image_chunk( + client: &Client, + image: &[u8], + image_width: u32, + image_height: u32, +) -> Result { + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_micros(); + + let part = Part::bytes(image.to_vec()) + .file_name(format!("ocr{}.png", timestamp)) + .mime_str("image/png")?; + + let form = Form::new().part("encoded_image", part); + + let mut headers = HeaderMap::new(); + headers.insert( + "User-Agent", + HeaderValue::from_static("Mozilla/5.0 (Linux; Android 13; RMX3771) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.144 Mobile Safari/537.36"), + ); + headers.insert("Cookie", HeaderValue::from_str(&format!("SOCS=CAESEwgDEgk0ODE3Nzk3MjQaAmVuIAEaBgiA_LyaBg; stcs={}", timestamp))?); + + let response = client + .post(&format!("https://lens.google.com/v3/upload?stcs={}", timestamp)) + .multipart(form) + .headers(headers) + .send() + .await?; + + let body = response.text().await?; + + let re = Regex::new(CALLBACK_REGEX)?; + let captures = re + .captures(&body) + .ok_or_else(|| anyhow!("invalid API response"))?; + let match_str = captures.get(1).unwrap().as_str(); + + let lens_object: Value = json5::from_str(match_str)?; + + if lens_object.get("errorHasStatus").is_some() { + return Err(anyhow!("lens failed")); + } + + let root = lens_object["data"].as_array().unwrap(); + + let mut text_segments = Vec::new(); + let mut text_regions = Vec::new(); + + let text_segments_raw = root[3][4][0][0] + .as_array() + .ok_or_else(|| anyhow!("invalid text segments"))?; + let text_regions_raw = root[2][3][0] + .as_array() + .ok_or_else(|| anyhow!("invalid text regions"))?; + + for region in text_regions_raw { + let region_data = region.as_array().unwrap(); + if region_data[11].as_str().unwrap().starts_with("text:") { + let raw_coords = region_data[1].as_array().unwrap(); + let coords = rationalize_coords_format1( + image_width as f64, + image_height as f64, + raw_coords[0].as_f64().unwrap(), + raw_coords[1].as_f64().unwrap(), + raw_coords[2].as_f64().unwrap(), + raw_coords[3].as_f64().unwrap(), + ); + text_regions.push(coords); + } + } + + for segment in text_segments_raw { + let text_segment = segment.as_str().unwrap().to_string(); + text_segments.push(text_segment); + } + + Ok(text_segments + .into_iter() + .zip(text_regions.into_iter()) + .map(|(text, coords)| Segment { text, coords }) + .collect()) +} + +pub async fn scan_image(client: &Client, image: &DynamicImage) -> Result { + let mut result = ScanResult::new(); + let (width, height) = image.dimensions(); + + let (width, height, image) = if width > MAX_DIM { + let height = ((height as f64) * (MAX_DIM as f64) / (width as f64)).round() as u32; + let new_image = tokio::task::block_in_place(|| image.resize_exact(MAX_DIM, height, image::imageops::FilterType::Lanczos3)); + (MAX_DIM, height, std::borrow::Cow::Owned(new_image)) + } else { + (width, height, std::borrow::Cow::Borrowed(image)) + }; + + let mut y = 0; + while y < height { + let chunk_height = (height - y).min(MAX_DIM); + let chunk = tokio::task::block_in_place(|| { + let chunk = image.view(0, y, width, chunk_height).to_image(); + let mut buf = Vec::new(); + let mut csr = Cursor::new(&mut buf); + chunk.write_to(&mut csr, ImageFormat::Png)?; + Ok::, anyhow::Error>(buf) + })?; + + let res = scan_image_chunk(client, &chunk, width, chunk_height).await?; + for segment in res { + result.push(Segment { + text: segment.text, + coords: SegmentCoords { + y: segment.coords.y + y as i32, + x: segment.coords.x, + w: segment.coords.w, + h: segment.coords.h, + }, + }); + } + + y += chunk_height; + } + + Ok(result) +} \ No newline at end of file