From 7fa14d45ae4da793812c6f25400614dda3a835d2 Mon Sep 17 00:00:00 2001 From: osmarks Date: Sat, 2 Nov 2024 19:38:05 +0000 Subject: [PATCH] improve observability and fix up Reddit dump for full-scale run --- Cargo.lock | 1359 +++++++++++++++++++++++++---------------- Cargo.toml | 15 +- src/common.rs | 12 +- src/dump_processor.rs | 53 ++ src/main.rs | 579 +++++++++--------- src/ocr.rs | 7 +- src/reddit_dump.rs | 260 +++++--- 7 files changed, 1394 insertions(+), 891 deletions(-) create mode 100644 src/dump_processor.rs diff --git a/Cargo.lock b/Cargo.lock index 8ea0680..df81f11 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "addr2line" -version = "0.21.0" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" dependencies = [ "gimli", ] @@ -17,6 +17,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + [[package]] name = "ahash" version = "0.8.11" @@ -68,9 +74,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "37bf3594c4c988a53154954629820791dde498571819ae4ca50ca811e060cc95" [[package]] name = "arbitrary" @@ -86,30 +92,57 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", ] -[[package]] -name = "arrayref" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" - [[package]] name = "arrayvec" -version = "0.7.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] -name = "async-trait" -version = "0.1.80" +name = "async-recursion" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" +checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", +] + +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + +[[package]] +name = "async-trait" +version = "0.1.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", ] [[package]] @@ -129,15 +162,15 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "av-data" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75b98a3525d00f920df9a2d44cc99b9cc5b7dc70d7fbb612cd755270dbe6552" +checksum = "124ae24335161b3d2226594640a67903da0866e2591312591fc8ddad64c1b38c" dependencies = [ "byte-slice-cast", "bytes", @@ -163,18 +196,18 @@ dependencies = [ [[package]] name = "avif-serialize" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "876c75a42f6364451a033496a14c44bffe41f5f4a8236f697391f11024e596d2" +checksum = "e335041290c43101ca215eed6f43ec437eb5a42125573f600fc3fa42b9bddd62" dependencies = [ "arrayvec", ] [[package]] name = "axum" -version = "0.7.5" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" +checksum = "504e3947307ac8326a5437504c517c4b56716c9d98fac0028c2acc7ca47d70ae" dependencies = [ "async-trait", "axum-core", @@ -198,7 +231,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper 1.0.1", "tokio", - "tower", + "tower 0.5.1", "tower-layer", "tower-service", "tracing", @@ -206,9 +239,9 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.4.3" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" dependencies = [ "async-trait", "bytes", @@ -219,7 +252,7 @@ dependencies = [ "mime", "pin-project-lite", "rustversion", - "sync_wrapper 0.1.2", + "sync_wrapper 1.0.1", "tower-layer", "tower-service", "tracing", @@ -227,17 +260,17 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.71" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", - "miniz_oxide", + "miniz_oxide 0.8.0", "object", "rustc-demangle", + "windows-targets 0.52.6", ] [[package]] @@ -260,14 +293,14 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "bindgen" -version = "0.69.4" +version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "cexpr", "clang-sys", - "itertools", + "itertools 0.12.1", "lazy_static", "lazycell", "proc-macro2", @@ -275,7 +308,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.65", + "syn 2.0.79", ] [[package]] @@ -292,27 +325,27 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" dependencies = [ "serde", ] [[package]] name = "bitreader" -version = "0.3.8" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdd859c9d97f7c468252795b35aeccc412bdbb1e90ee6969c4fa6328272eaeff" +checksum = "b1123451be0f8ada15415fbb4e0fe50775be531f61a432c18168a4f96e470ce1" dependencies = [ "cfg-if", ] [[package]] name = "bitstream-io" -version = "2.3.0" +version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c12d1856e42f0d817a835fe55853957c85c8c8a470114029143d3f12671446e" +checksum = "b81e1519b0d82120d2fd469d5bfb2919a9361c48b02d82d04befc1cdd2002452" [[package]] name = "block-buffer" @@ -325,9 +358,9 @@ dependencies = [ [[package]] name = "built" -version = "0.7.2" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41bfbdb21256b87a8b5e80fab81a8eed158178e812fd7ba451907518b2742f16" +checksum = "c360505aed52b7ec96a3636c3f039d99103c37d1d9b4f7a8c743d3ea9ffcd03b" [[package]] name = "bumpalo" @@ -343,9 +376,9 @@ checksum = "c3ac9f8b63eca6fd385229b3675f6cc0dc5c8a5c8a54a59d4f52ffd670d87b0c" [[package]] name = "bytemuck" -version = "1.16.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" +checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" [[package]] name = "byteorder" @@ -361,28 +394,28 @@ checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" [[package]] name = "bytes" -version = "1.6.0" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" [[package]] name = "castaway" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" dependencies = [ "rustversion", ] [[package]] name = "cc" -version = "1.0.98" +version = "1.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" dependencies = [ "jobserver", "libc", - "once_cell", + "shlex", ] [[package]] @@ -421,14 +454,14 @@ dependencies = [ "js-sys", "num-traits", "wasm-bindgen", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] name = "clang-sys" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a483f3cbf7cec2e153d424d0e92329d816becc6421389bd494375c6065921b9b" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" dependencies = [ "glob", "libc", @@ -443,9 +476,9 @@ checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" [[package]] name = "compact_str" -version = "0.8.0-beta" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a2dc81369dde6d31456eedbb4fd3d320f0b9713573dfe06e569e2bce7607f2" +checksum = "6050c3a16ddab2e412160b31f2c871015704239bca62f72f6e5f0be631d3f644" dependencies = [ "castaway", "cfg-if", @@ -456,6 +489,45 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "console-api" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ed14aa9c9f927213c6e4f3ef75faaad3406134efe84ba2cb7983431d5f0931" +dependencies = [ + "futures-core", + "prost", + "prost-types", + "tonic", + "tracing-core", +] + +[[package]] +name = "console-subscriber" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e3a111a37f3333946ebf9da370ba5c5577b18eb342ec683eb488dd21980302" +dependencies = [ + "console-api", + "crossbeam-channel", + "crossbeam-utils", + "futures-task", + "hdrhistogram", + "humantime", + "hyper-util", + "prost", + "prost-types", + "serde", + "serde_json", + "thread_local", + "tokio", + "tokio-stream", + "tonic", + "tracing", + "tracing-core", + "tracing-subscriber", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -474,15 +546,15 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" -version = "0.2.12" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" dependencies = [ "libc", ] @@ -504,13 +576,22 @@ checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" [[package]] name = "crc32fast" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -568,7 +649,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d4b54a40baf633a71c6f0fb49494a7e4ee7bc26f3e727212b6cb915aa1ea1e1" dependencies = [ "av-data", - "bitflags 2.5.0", + "bitflags 2.6.0", "dav1d-sys", "static_assertions", ] @@ -624,9 +705,9 @@ checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" [[package]] name = "either" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" dependencies = [ "serde", ] @@ -640,19 +721,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "env_logger" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" -dependencies = [ - "humantime", - "is-terminal", - "log", - "regex", - "termcolor", -] - [[package]] name = "equivalent" version = "1.0.1" @@ -696,7 +764,7 @@ dependencies = [ "flume", "half", "lebe", - "miniz_oxide", + "miniz_oxide 0.7.4", "rayon-core", "smallvec", "zune-inflate", @@ -728,15 +796,15 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "faststr" -version = "0.2.19" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f375fcf41ec4dac873a8028fba4210dbda5c86bba13d2d741e651b474f7c05a4" +checksum = "4dc21a7d5a45182c2bb5ae9471b93f10919c0744b54403e54a9e2329c26ed5a3" dependencies = [ "bytes", "serde", @@ -745,9 +813,9 @@ dependencies = [ [[package]] name = "fdeflate" -version = "0.3.4" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f9bfee30e4dedf0ab8b422f03af778d9612b63f502710fc500a334ebe2de645" +checksum = "d8090f921a24b04994d9929e204f50b498a33ea6ba559ffaa05e04f7ee7fb5ab" dependencies = [ "simd-adler32", ] @@ -771,25 +839,19 @@ version = "2.0.1+ffmpeg-7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4aa99eb55979d5c1db3b0b7a807a5e50dda07f5f6c2dbc6e9b50c205f611646" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "ffmpeg-sys-the-third", "libc", ] -[[package]] -name = "finl_unicode" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" - [[package]] name = "flate2" -version = "1.0.30" +version = "1.0.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" dependencies = [ "crc32fast", - "miniz_oxide", + "miniz_oxide 0.8.0", ] [[package]] @@ -800,7 +862,7 @@ checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181" dependencies = [ "futures-core", "futures-sink", - "spin 0.9.8", + "spin", ] [[package]] @@ -835,9 +897,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -845,15 +907,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" dependencies = [ "futures-core", "futures-task", @@ -873,38 +935,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-macro" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", ] [[package]] name = "futures-sink" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-util" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-core", "futures-io", @@ -950,9 +1012,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.1" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" @@ -962,9 +1024,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" dependencies = [ "atomic-waker", "bytes", @@ -972,7 +1034,7 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap", + "indexmap 2.6.0", "slab", "tokio", "tokio-util", @@ -989,6 +1051,12 @@ dependencies = [ "crunchy", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hashbrown" version = "0.13.2" @@ -1008,6 +1076,12 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "hashbrown" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" + [[package]] name = "hashlink" version = "0.8.4" @@ -1017,6 +1091,19 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "hdrhistogram" +version = "7.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +dependencies = [ + "base64 0.21.7", + "byteorder", + "flate2", + "nom", + "num-traits", +] + [[package]] name = "heck" version = "0.4.1" @@ -1084,9 +1171,9 @@ dependencies = [ [[package]] name = "http-body" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", "http", @@ -1094,12 +1181,12 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", - "futures-core", + "futures-util", "http", "http-body", "pin-project-lite", @@ -1107,9 +1194,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.8.0" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" [[package]] name = "httpdate" @@ -1125,9 +1212,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "1.3.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d" +checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" dependencies = [ "bytes", "futures-channel", @@ -1144,6 +1231,36 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +dependencies = [ + "futures-util", + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", +] + +[[package]] +name = "hyper-timeout" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "hyper-tls" version = "0.6.0" @@ -1162,9 +1279,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.3" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa" +checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" dependencies = [ "bytes", "futures-channel", @@ -1175,16 +1292,15 @@ dependencies = [ "pin-project-lite", "socket2", "tokio", - "tower", "tower-service", "tracing", ] [[package]] name = "iana-time-zone" -version = "0.1.60" +version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1215,11 +1331,12 @@ dependencies = [ [[package]] name = "image" -version = "0.25.1" -source = "git+https://github.com/fintelia/image/?branch=upgrade-zune-jpeg#54ee15fae5f865f9806bda70c2118e9e572e7deb" +version = "0.25.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc144d44a31d753b02ce64093d532f55ff8dc4ebf2ffb8a63c0dda691385acae" dependencies = [ "bytemuck", - "byteorder", + "byteorder-lite", "color_quant", "dav1d", "dcv-color-primitives", @@ -1234,33 +1351,44 @@ dependencies = [ "rayon", "rgb", "tiff", + "zune-core", "zune-jpeg", ] [[package]] name = "image-webp" -version = "0.1.2" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d730b085583c4d789dfd07fdcf185be59501666a90c97c40162b37e4fdad272d" +checksum = "e031e8e3d94711a9ccb5d6ea357439ef3dcbed361798bd4071dc4d9793fbe22f" dependencies = [ "byteorder-lite", - "thiserror", + "quick-error", ] [[package]] name = "imgref" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44feda355f4159a7c757171a77de25daf6411e217b4cabd03bd6650690468126" +checksum = "d0263a3d970d5c054ed9312c0057b4f3bde9c0b33836d3637361d4a9e6e7a408" [[package]] name = "indexmap" -version = "2.2.6" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.14.5", + "hashbrown 0.15.0", ] [[package]] @@ -1271,25 +1399,14 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", ] [[package]] name = "ipnet" -version = "2.9.0" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" - -[[package]] -name = "is-terminal" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys 0.52.0", -] +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" [[package]] name = "itertools" @@ -1300,6 +1417,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -1308,9 +1434,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jobserver" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ "libc", ] @@ -1323,9 +1449,9 @@ checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] @@ -1347,17 +1473,17 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" dependencies = [ - "winapi 0.2.8", + "winapi", "winapi-build", ] [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" dependencies = [ - "spin 0.5.2", + "spin", ] [[package]] @@ -1374,9 +1500,9 @@ checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" [[package]] name = "libfuzzer-sys" @@ -1391,12 +1517,12 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -1407,9 +1533,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libmimalloc-sys" -version = "0.1.38" +version = "0.1.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7bb23d733dfcc8af652a78b7bf232f0e967710d044732185e561e47c0336b6" +checksum = "23aa6811d3bd4deb8a84dde645f943476d13b248d818edcf8ce0b2f37f036b44" dependencies = [ "cc", "libc", @@ -1444,9 +1570,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "loop9" @@ -1457,6 +1583,15 @@ dependencies = [ "imgref", ] +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "matchit" version = "0.7.3" @@ -1465,9 +1600,9 @@ checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" [[package]] name = "matrixmultiply" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" +checksum = "9380b911e3e96d10c1f415da0876389aaf1b56759054eeb0de7df940c456ba1a" dependencies = [ "autocfg", "rawpointer", @@ -1480,7 +1615,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519" dependencies = [ "cfg-if", - "rayon", ] [[package]] @@ -1495,19 +1629,21 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "meme-search-engine" version = "0.1.0" dependencies = [ "anyhow", + "async-recursion", "axum", "base64 0.22.1", "chrono", "compact_str", + "console-subscriber", "faiss", "fastrand", "ffmpeg-the-third", @@ -1515,13 +1651,12 @@ dependencies = [ "futures-util", "half", "image", + "itertools 0.13.0", "json5", "lazy_static", - "log", "mimalloc", "ndarray", "num_cpus", - "pretty_env_logger", "prometheus", "regex", "reqwest", @@ -1533,8 +1668,9 @@ dependencies = [ "sqlx", "tokio", "tokio-stream", - "tower", + "tower 0.4.13", "tower-http", + "tracing", "url", "walkdir", "zstd", @@ -1542,9 +1678,9 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.42" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9186d86b79b52f4a77af65604b51225e8db1d6ee7e3f41aec1e40829c71a176" +checksum = "68914350ae34959d83f732418d51e2427a794055d0b9529f48259ac07af65633" dependencies = [ "libmimalloc-sys", ] @@ -1557,9 +1693,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "mime_guess" -version = "2.0.4" +version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" dependencies = [ "mime", "unicase", @@ -1573,23 +1709,33 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" dependencies = [ "adler", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", "simd-adler32", ] [[package]] name = "mio" -version = "0.8.11" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ + "hermit-abi", "libc", "wasi", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -1617,11 +1763,10 @@ dependencies = [ [[package]] name = "native-tls" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" dependencies = [ - "lazy_static", "libc", "log", "openssl", @@ -1670,9 +1815,9 @@ checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8" [[package]] name = "num-bigint" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", @@ -1712,7 +1857,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", ] [[package]] @@ -1768,26 +1913,26 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.36.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "openssl" -version = "0.10.64" +version = "0.10.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "cfg-if", "foreign-types", "libc", @@ -1804,7 +1949,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", ] [[package]] @@ -1815,9 +1960,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.102" +version = "0.9.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" dependencies = [ "cc", "libc", @@ -1825,21 +1970,11 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "page_size" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" -dependencies = [ - "libc", - "winapi 0.3.9", -] - [[package]] name = "parking_lot" -version = "0.12.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ "lock_api", "parking_lot_core", @@ -1853,9 +1988,9 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.1", + "redox_syscall", "smallvec", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -1881,9 +2016,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" -version = "2.7.10" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "560131c633294438da9f7c4b08189194b20946c8274c6b9e38881a7874dc8ee8" +checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442" dependencies = [ "memchr", "thiserror", @@ -1892,9 +2027,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.7.10" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26293c9193fbca7b1a3bf9b79dc1e388e927e6cacaa78b4a3ab705a1d3d41459" +checksum = "d214365f632b123a47fd913301e14c946c61d1c183ee245fa76eb752e59a02dd" dependencies = [ "pest", "pest_generator", @@ -1902,22 +2037,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.10" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ec22af7d3fb470a85dd2ca96b7c577a1eb4ef6f1683a9fe9a8c16e136c04687" +checksum = "eb55586734301717aea2ac313f50b2eb8f60d2fc3dc01d190eefa2e625f60c4e" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", ] [[package]] name = "pest_meta" -version = "2.7.10" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7a240022f37c361ec1878d646fc5b7d7c4d28d5946e1a80ad5a7a4f4ca0bdcd" +checksum = "b75da2a70cf4d9cb76833c990ac9cd3923c9a8905a8929789ce347c84564d03d" dependencies = [ "once_cell", "pest", @@ -1926,22 +2061,22 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.5" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" +checksum = "baf123a161dde1e524adf36f90bc5d8d3462824a9c43553ad07a8183161189ec" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.5" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" +checksum = "a4502d8515ca9f32f1fb543d987f63d95a14934883db45bdb48060b6b69257f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", ] [[package]] @@ -1979,65 +2114,58 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "png" -version = "0.17.13" +version = "0.17.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06e4b0d3d1312775e782c86c91a111aa1f910cbb65e1337f9975b5f9a554b5e1" +checksum = "52f9d46a34a05a6a57566bc2bfae066ef07585a6e3fa30fbbdff5936380623f0" dependencies = [ "bitflags 1.3.2", "crc32fast", "fdeflate", "flate2", - "miniz_oxide", + "miniz_oxide 0.8.0", ] [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "pretty_env_logger" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ - "env_logger", - "log", + "zerocopy", ] [[package]] name = "proc-macro2" -version = "1.0.83" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" +checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" dependencies = [ "unicode-ident", ] [[package]] name = "profiling" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d84d1d7a6ac92673717f9f6d1518374ef257669c24ebc5ac25d5033828be58" +checksum = "afbdc74edc00b6f6a218ca6a5364d6226a259d4b8ea1af4a0ea063f27e179f4d" dependencies = [ "profiling-procmacros", ] [[package]] name = "profiling-procmacros" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd" +checksum = "a65f2e60fbf1063868558d69c6beacf412dc755f9fc020f514b7955fc914fe30" dependencies = [ "quote", - "syn 2.0.65", + "syn 2.0.79", ] [[package]] @@ -2055,6 +2183,38 @@ dependencies = [ "thiserror", ] +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools 0.13.0", + "proc-macro2", + "quote", + "syn 2.0.79", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost", +] + [[package]] name = "protobuf" version = "2.28.0" @@ -2078,9 +2238,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -2130,7 +2290,7 @@ dependencies = [ "cc", "cfg-if", "interpolate_name", - "itertools", + "itertools 0.12.1", "libc", "libfuzzer-sys", "log", @@ -2154,16 +2314,15 @@ dependencies = [ [[package]] name = "ravif" -version = "0.11.5" +version = "0.11.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc13288f5ab39e6d7c9d501759712e6969fcc9734220846fc9ed26cae2cc4234" +checksum = "2413fd96bd0ea5cdeeb37eaf446a22e6ed7b981d792828721e74ded1980a45c6" dependencies = [ "avif-serialize", "imgref", "loop9", "quick-error", "rav1e", - "rayon", "rgb", ] @@ -2195,56 +2354,62 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.4.1" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_syscall" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" -dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", ] [[package]] name = "regex" -version = "1.10.4" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.8", + "regex-syntax 0.8.5", ] [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.5", ] [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.12.4" +version = "0.12.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" +checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" dependencies = [ "base64 0.22.1", "bytes", @@ -2256,6 +2421,7 @@ dependencies = [ "http-body", "http-body-util", "hyper", + "hyper-rustls", "hyper-tls", "hyper-util", "ipnet", @@ -2271,7 +2437,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper 0.1.2", + "sync_wrapper 1.0.1", "system-configuration", "tokio", "tokio-native-tls", @@ -2280,16 +2446,28 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "winreg", + "windows-registry", ] [[package]] name = "rgb" -version = "0.8.37" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8" +checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a" + +[[package]] +name = "ring" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" dependencies = [ - "bytemuck", + "cc", + "cfg-if", + "getrandom", + "libc", + "spin", + "untrusted", + "windows-sys 0.52.0", ] [[package]] @@ -2348,11 +2526,11 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", @@ -2360,26 +2538,49 @@ dependencies = [ ] [[package]] -name = "rustls-pemfile" -version = "2.1.2" +name = "rustls" +version = "0.23.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +checksum = "5fbb44d7acc4e873d613422379f69f237a1b141928c02f6bc6ccfddddc2d7993" +dependencies = [ + "once_cell", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" dependencies = [ - "base64 0.22.1", "rustls-pki-types", ] [[package]] name = "rustls-pki-types" -version = "1.7.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" + +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" [[package]] name = "ryu" @@ -2394,16 +2595,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d931a44fdaa43b8637009e7632a02adc4f2b2e0733c08caa4cf00e8da4a117a7" dependencies = [ "kernel32-sys", - "winapi 0.2.8", + "winapi", ] [[package]] name = "schannel" -version = "0.1.23" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2414,11 +2615,11 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "security-framework" -version = "2.11.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "core-foundation", "core-foundation-sys", "libc", @@ -2427,9 +2628,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" +checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" dependencies = [ "core-foundation-sys", "libc", @@ -2437,40 +2638,41 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.202" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] [[package]] name = "serde_bytes" -version = "0.11.14" +version = "0.11.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b8497c313fd43ab992087548117643f6fcd935cbf36f176ffda0aacf9591734" +checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" dependencies = [ "serde", ] [[package]] name = "serde_derive" -version = "1.0.202" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", ] [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.131" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "67d42a0bd4ac281beff598909bb56a86acaf979b84483e1c79c10dcaf98f8cf3" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] @@ -2487,9 +2689,9 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" dependencies = [ "serde", ] @@ -2528,6 +2730,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2570,9 +2781,9 @@ dependencies = [ [[package]] name = "simdutf8" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "slab" @@ -2601,17 +2812,15 @@ dependencies = [ [[package]] name = "sonic-rs" -version = "0.3.6" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "244d3cdf9dd4e2a5c63991a6ed4ecd768959204d5e4a839181ee997e8c149407" +checksum = "95e200c991ecfca73485d451fb0ae48c605f60ed7e27e4fcd5b8510f131a7c84" dependencies = [ - "arrayref", "bumpalo", "bytes", "cfg-if", "faststr", "itoa", - "page_size", "parking_lot", "ryu", "serde", @@ -2620,12 +2829,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "spin" version = "0.9.8" @@ -2647,11 +2850,10 @@ dependencies = [ [[package]] name = "sqlformat" -version = "0.2.3" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c" +checksum = "7bba3a93db0cc4f7bdece8bb09e77e2e785c20bfebf79eb8340ed80708048790" dependencies = [ - "itertools", "nom", "unicode_categories", ] @@ -2690,7 +2892,7 @@ dependencies = [ "futures-util", "hashlink", "hex", - "indexmap", + "indexmap 2.6.0", "log", "memchr", "once_cell", @@ -2754,7 +2956,7 @@ checksum = "1ed31390216d20e538e447a7a9b959e06ed9fc51c37b514b46eb758016ecd418" dependencies = [ "atoi", "base64 0.21.7", - "bitflags 2.5.0", + "bitflags 2.6.0", "byteorder", "bytes", "crc", @@ -2796,7 +2998,7 @@ checksum = "7c824eb80b894f926f89a0b9da0c7f435d27cdd35b8c655b114e58223918577e" dependencies = [ "atoi", "base64 0.21.7", - "bitflags 2.5.0", + "bitflags 2.6.0", "byteorder", "crc", "dotenvy", @@ -2857,20 +3059,20 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "stringprep" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb41d74e231a107a1b4ee36bd1214b11285b77768d2e3824aedafa988fd36ee6" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" dependencies = [ - "finl_unicode", "unicode-bidi", "unicode-normalization", + "unicode-properties", ] [[package]] name = "subtle" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" @@ -2885,9 +3087,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.65" +version = "2.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106" +checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" dependencies = [ "proc-macro2", "quote", @@ -2905,23 +3107,26 @@ name = "sync_wrapper" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] [[package]] name = "system-configuration" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", "core-foundation", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" dependencies = [ "core-foundation-sys", "libc", @@ -2942,49 +3147,51 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.14" +version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.10.1" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" dependencies = [ "cfg-if", "fastrand", + "once_cell", "rustix", - "windows-sys 0.52.0", -] - -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", + "windows-sys 0.59.0", ] [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", ] [[package]] @@ -3000,9 +3207,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.6.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" dependencies = [ "tinyvec_macros", ] @@ -3015,32 +3222,32 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.37.0" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" +checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" dependencies = [ "backtrace", "bytes", "libc", "mio", - "num_cpus", "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.48.0", + "tracing", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.2.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", ] [[package]] @@ -3054,10 +3261,21 @@ dependencies = [ ] [[package]] -name = "tokio-stream" -version = "0.1.15" +name = "tokio-rustls" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +dependencies = [ + "rustls", + "rustls-pki-types", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" dependencies = [ "futures-core", "pin-project-lite", @@ -3066,9 +3284,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", @@ -3079,9 +3297,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.13" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4e43f8cc456c9704c851ae29c67e17ef65d2c30017c17a9765b89c382dc8bba" +checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" dependencies = [ "serde", "serde_spanned", @@ -3091,26 +3309,56 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.13" +version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c127785850e8c20836d49732ae6abfa47616e60bf9d9f57c43c250361a9db96c" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ - "indexmap", + "indexmap 2.6.0", "serde", "serde_spanned", "toml_datetime", "winnow", ] +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64 0.22.1", + "bytes", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "socket2", + "tokio", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower" version = "0.4.13" @@ -3119,8 +3367,28 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", + "indexmap 1.9.3", "pin-project", "pin-project-lite", + "rand", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper 0.1.2", "tokio", "tower-layer", "tower-service", @@ -3133,7 +3401,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "bytes", "http", "http-body", @@ -3145,15 +3413,15 @@ dependencies = [ [[package]] name = "tower-layer" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" @@ -3175,7 +3443,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", ] [[package]] @@ -3185,6 +3453,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "once_cell", + "regex", + "sharded-slab", + "thread_local", + "tracing", + "tracing-core", ] [[package]] @@ -3201,45 +3485,48 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "ucd-trie" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" [[package]] name = "unicase" -version = "2.7.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" -dependencies = [ - "version_check", -] +checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" [[package]] name = "unicode-bidi" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" +checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "unicode-normalization" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" dependencies = [ "tinyvec", ] [[package]] -name = "unicode-segmentation" -version = "1.11.0" +name = "unicode-properties" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" +checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode_categories" @@ -3248,10 +3535,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" [[package]] -name = "url" -version = "2.5.0" +name = "untrusted" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" dependencies = [ "form_urlencoded", "idna", @@ -3275,6 +3568,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.15" @@ -3289,9 +3588,9 @@ checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b" [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "walkdir" @@ -3301,7 +3600,7 @@ checksum = "bb08f9e670fab86099470b97cd2b252d6527f0b3cc1401acdb595ffc9dd288ff" dependencies = [ "kernel32-sys", "same-file", - "winapi 0.2.8", + "winapi", ] [[package]] @@ -3327,34 +3626,35 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.42" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" dependencies = [ "cfg-if", "js-sys", @@ -3364,9 +3664,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3374,28 +3674,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" dependencies = [ "js-sys", "wasm-bindgen", @@ -3409,11 +3709,11 @@ checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" [[package]] name = "whoami" -version = "1.5.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44ab49fad634e88f55bf8f9bb3abd2f27d7204172a112c7c9987e01c1c94ea9" +checksum = "372d5b87f58ec45c384ba03563b03544dc5fadc3983e434b286913f5b4a9bb6d" dependencies = [ - "redox_syscall 0.4.1", + "redox_syscall", "wasite", ] @@ -3423,50 +3723,49 @@ version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - [[package]] name = "winapi-build" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" -dependencies = [ - "windows-sys 0.52.0", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-core" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", ] [[package]] @@ -3484,7 +3783,16 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", ] [[package]] @@ -3504,18 +3812,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.5", - "windows_aarch64_msvc 0.52.5", - "windows_i686_gnu 0.52.5", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.5", - "windows_x86_64_gnu 0.52.5", - "windows_x86_64_gnullvm 0.52.5", - "windows_x86_64_msvc 0.52.5", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -3526,9 +3834,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -3538,9 +3846,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -3550,15 +3858,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -3568,9 +3876,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -3580,9 +3888,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -3592,9 +3900,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -3604,78 +3912,69 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.8" +version = "0.6.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c52e9c97a68071b23e836c9380edae937f17b9c4667bd021973efc689f618d" +checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" dependencies = [ "memchr", ] -[[package]] -name = "winreg" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - [[package]] name = "zerocopy" -version = "0.7.34" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ + "byteorder", "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.34" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.65", + "syn 2.0.79", ] [[package]] name = "zeroize" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" [[package]] name = "zstd" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "7.1.0" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.10+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", @@ -3683,9 +3982,9 @@ dependencies = [ [[package]] name = "zune-core" -version = "0.5.0-rc1" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d1b427373b52a2497c49b0860a5290daab6a0437902ffd8f607367bd5eb7d0" +checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" [[package]] name = "zune-inflate" @@ -3698,9 +3997,9 @@ dependencies = [ [[package]] name = "zune-jpeg" -version = "0.5.0-rc1" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4019a3b4e46db2d81faab716e5034dd212f1e105bc55b3f5ca4381dd78736e0" +checksum = "16099418600b4d8f028622f73ff6e3deaabdff330fb9a2a131dea781ee8b0768" dependencies = [ "zune-core", ] diff --git a/Cargo.toml b/Cargo.toml index 464cf8b..90cf606 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,14 +6,13 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -tokio = { version = "1", features = ["full"] } +tokio = { version = "1", features = ["full", "tracing"] } axum = "0.7" image = { version = "0.25", features = ["avif", "avif-native", "nasm"] } reqwest = { version = "0.12", features = ["multipart"] } serde = { version = "1", features = ["derive"] } sqlx = { version = "0.7", features = ["runtime-tokio", "sqlite"] } walkdir = "1" -log = "0.4" rmp-serde = "1" serde_json = "1" chrono = "0.4" @@ -24,7 +23,8 @@ faiss = "0.12" ndarray = "0.15" half = { version = "2" } regex = "1" -pretty_env_logger = "0.5" +tracing = "0.1" +console-subscriber = "0.4" futures-util = "0.3" tokio-stream = "0.1" num_cpus = "1" @@ -41,9 +41,8 @@ mimalloc = "0.1" sonic-rs = "0.3" ffmpeg-the-third = "2.0" compact_str = { version = "0.8.0-beta", features = ["serde"] } - -[patch.crates-io] -image = { git = "https://github.com/fintelia/image/", branch = "upgrade-zune-jpeg" } +itertools = "0.13" +async-recursion = "1" [[bin]] name = "reddit-dump" @@ -52,3 +51,7 @@ path = "src/reddit_dump.rs" [[bin]] name = "video-reader" path = "src/video_reader.rs" + +[[bin]] +name = "dump-processor" +path = "src/dump_processor.rs" diff --git a/src/common.rs b/src/common.rs index 7b207df..4a5f2c2 100644 --- a/src/common.rs +++ b/src/common.rs @@ -4,6 +4,7 @@ use image::{DynamicImage, imageops::FilterType, ImageFormat}; use anyhow::Result; use std::io::Cursor; use reqwest::Client; +use tracing::instrument; #[derive(Debug, Deserialize, Clone)] pub struct InferenceServerConfig { @@ -13,11 +14,13 @@ pub struct InferenceServerConfig { } pub fn resize_for_embed_sync + Send + 'static>(config: InferenceServerConfig, image: T) -> Result> { - let new = image.borrow().resize( + // the model currently in use wants aspect ratio 1:1 regardless of input + // I think this was previously being handled in the CLIP server but that is slightly lossy + let new = image.borrow().resize_exact( config.image_size.0, config.image_size.1, - FilterType::Lanczos3 - ); + FilterType::CatmullRom + ).into_rgb8(); let mut buf = Vec::new(); let mut csr = Cursor::new(&mut buf); new.write_to(&mut csr, ImageFormat::Png)?; @@ -46,13 +49,14 @@ pub async fn get_backend_config(clip_server: &str) -> InferenceServerConfig { match fetch_backend_config(&clip_server).await { Ok(backend) => break backend, Err(e) => { - log::error!("Backend failed (fetch): {}", e); + tracing::error!("Backend failed (fetch): {}", e); tokio::time::sleep(std::time::Duration::from_secs(1)).await; } } } } +#[instrument(skip(client, data))] pub async fn query_clip_server(client: &Client, base_url: &str, path: &str, data: I) -> Result where I: Serialize, O: serde::de::DeserializeOwned, { let response = client diff --git a/src/dump_processor.rs b/src/dump_processor.rs new file mode 100644 index 0000000..e716936 --- /dev/null +++ b/src/dump_processor.rs @@ -0,0 +1,53 @@ +use anyhow::{Result, Context}; +use serde::{Serialize, Deserialize}; +use std::io::BufReader; +use rmp_serde::decode::Error as DecodeError; +use std::fs; + +// TODO refactor +#[derive(Clone, Deserialize, Serialize, Debug, PartialEq)] +struct OriginalImageMetadata { + mime_type: String, + original_file_size: usize, + dimension: (u32, u32), + final_url: String +} + +#[derive(Clone, Deserialize, Serialize, Debug)] +struct ProcessedEntry { + url: String, + id: String, + title: String, + subreddit: String, + author: String, + timestamp: u64, + #[serde(with = "serde_bytes")] + embedding: Vec, + metadata: OriginalImageMetadata +} + +fn main() -> Result<()> { + let path = std::env::args().nth(1).context("missing path")?; + let stream = zstd::stream::Decoder::new(fs::File::open(path)?)?; + let mut stream = BufReader::new(stream); + let mut latest_timestamp = 0; + let mut count = 0; + loop { + let res: Result = rmp_serde::from_read(&mut stream); + if res.is_ok() { + count += 1; + } + match res { + Ok(x) => { + if x.timestamp > latest_timestamp { + println!("{} {} https://reddit.com/r/{}/comments/{}", x.timestamp, count, x.subreddit, x.id); + latest_timestamp = x.timestamp; + } + }, + Err(DecodeError::InvalidDataRead(x)) | Err(DecodeError::InvalidMarkerRead(x)) if x.kind() == std::io::ErrorKind::UnexpectedEof => break, + Err(e) => return Err(e).context("decode fail") + } + } + println!("{} {}", latest_timestamp, count); + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index cbd9826..8f378a2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -33,6 +33,7 @@ use faiss::index::scalar_quantizer; use lazy_static::lazy_static; use prometheus::{register_int_counter, register_int_counter_vec, register_int_gauge, Encoder, IntCounter, IntGauge, IntCounterVec}; use ndarray::ArrayBase; +use tracing::instrument; mod ocr; mod common; @@ -249,7 +250,7 @@ async fn initialize_database(config: &Config) -> Result { if (index as i32) < version { continue } - log::info!("Migrating to DB version {}", index); + tracing::info!("Migrating to DB version {}", index); sqlx::query(sql).execute(&mut *tx).await?; sqlx::query(&format!("PRAGMA user_version = {}", index + 1)).execute(&mut *tx).await?; } @@ -317,6 +318,7 @@ fn image_formats(_config: &Config) -> HashMap { formats } +#[instrument(skip_all)] async fn ensure_filename_record_exists(conn: &mut SqliteConnection, filename_enc: &Vec) -> Result<()> { sqlx::query!("INSERT OR IGNORE INTO files (filename) VALUES (?)", filename_enc) .execute(conn) @@ -324,6 +326,7 @@ async fn ensure_filename_record_exists(conn: &mut SqliteConnection, filename_enc Ok(()) } +#[instrument(skip_all)] async fn write_metadata(conn: &mut SqliteConnection, filename_enc: &Vec, metadata: FileMetadata) -> Result<()> { ensure_filename_record_exists(conn, filename_enc).await?; let metadata_serialized = rmp_serde::to_vec_named(&metadata)?; @@ -333,18 +336,276 @@ async fn write_metadata(conn: &mut SqliteConnection, filename_enc: &Vec, met Ok(()) } +#[instrument] +async fn handle_embedding_batch(client: reqwest::Client, config: Arc, pool: SqlitePool, batch: Vec, video_embed_times: Arc>>) -> Result<()> { + let result: Vec = query_clip_server( + &client, + &config.service.clip_server, + "", + EmbeddingRequest::Images { + images: batch.iter().map(|input| serde_bytes::ByteBuf::from(input.image.clone())).collect(), + }, + ).await.context("querying CLIP server")?; + + let mut tx = pool.begin().await?; + let ts = timestamp(); + for (i, vector) in result.into_iter().enumerate() { + let vector = vector.into_vec(); + tracing::debug!("embedded {:?}", batch[i].filename); + let encoded_filename = batch[i].filename.encode()?; + IMAGES_EMBEDDED_COUNTER.inc(); + ensure_filename_record_exists(&mut *tx, &encoded_filename).await?; + match &batch[i].filename { + Filename::VideoFrame(container, _) => { video_embed_times.write().await.insert(container.clone(), timestamp()); }, + _ => () + } + sqlx::query!( + "UPDATE files SET embedding_time = ?, embedding = ? WHERE filename = ?", + ts, + vector, + encoded_filename + ) + .execute(&mut *tx) + .await?; + } + tx.commit().await?; + anyhow::Result::Ok(()) +} + +#[instrument(skip(to_embed_tx, to_thumbnail_tx, to_ocr_tx, to_metadata_write_tx, video_meta))] +async fn load_image(record: FileRecord, to_embed_tx: mpsc::Sender, to_thumbnail_tx: mpsc::Sender, to_ocr_tx: mpsc::Sender, to_metadata_write_tx: mpsc::Sender<(Filename, FileMetadata)>, config: Arc, video_meta: Arc>>) -> Result<()> { + let path = Path::new(&config.service.files).join(&*record.filename); + let image: Result> = tokio::task::block_in_place(|| Ok(Arc::new(ImageReader::open(&path)?.with_guessed_format()?.decode()?))); + let image = match image { + Ok(image) => image, + Err(e) => { + tracing::warn!("Could not read {} as image: {}", record.filename, e); + let filename = record.filename.clone(); + IMAGES_LOADED_ERROR_COUNTER.inc(); + let meta = tokio::task::spawn_blocking(move || -> Result> { + let mut i: u32 = 0; + let mut last_metadata = None; + let callback = |frame: RgbImage| { + let frame: Arc = Arc::new(frame.into()); + let embed_buf = resize_for_embed_sync(config.backend.clone(), frame.clone())?; + let filename = Filename::VideoFrame(filename.clone(), i); + to_embed_tx.blocking_send(EmbeddingInput { + image: embed_buf, + filename: filename.clone() + })?; + let meta = FileMetadata { + height: frame.height(), + width: frame.width(), + frames: Some(i + 1) + }; + last_metadata = Some(meta.clone()); + to_metadata_write_tx.blocking_send((filename.clone(), meta))?; + if config.service.enable_thumbs { + to_thumbnail_tx.blocking_send(LoadedImage { + image: frame.clone(), + filename, + original_filesize: None, + fast_thumbnails_only: true + })?; + } + i += 1; + Ok(()) + }; + match video_reader::run(&path, callback, config.service.video_frame_interval) { + Ok(()) => { + VIDEOS_LOADED_COUNTER.inc(); + return anyhow::Result::Ok(last_metadata) + }, + Err(e) => { + tracing::error!("Could not read {} as video: {}", filename, e); + VIDEOS_LOADED_ERROR_COUNTER.inc(); + } + } + return anyhow::Result::Ok(last_metadata) + }).await??; + if let Some(meta) = meta { + video_meta.write().await.insert(record.filename, meta); + } + return Ok(()) + } + }; + let filename = Filename::Actual(record.filename); + if record.needs_metadata { + let metadata = FileMetadata { + width: image.width(), + height: image.height(), + frames: None + }; + to_metadata_write_tx.send((filename.clone(), metadata)).await?; + } + IMAGES_LOADED_COUNTER.inc(); + if record.needs_embed { + let resized = resize_for_embed(config.backend.clone(), image.clone()).await?; + + to_embed_tx.send(EmbeddingInput { image: resized, filename: filename.clone() }).await? + } + if record.needs_thumbnail { + to_thumbnail_tx + .send(LoadedImage { + image: image.clone(), + filename: filename.clone(), + original_filesize: Some(std::fs::metadata(&path)?.len() as usize), + fast_thumbnails_only: false + }) + .await?; + } + if record.needs_ocr { + to_ocr_tx + .send(LoadedImage { + image, + filename: filename.clone(), + original_filesize: None, + fast_thumbnails_only: true + }) + .await?; + } + Ok(()) +} + +#[instrument(skip(video_thumb_times, pool, formats))] +async fn generate_thumbnail(image: LoadedImage, config: Arc, video_thumb_times: Arc>>, pool: SqlitePool, formats: Arc>) -> Result<()> { + use image::codecs::*; + + let filename = image.filename.clone(); + tracing::debug!("thumbnailing {:?}", filename); + + let generated_formats = tokio::task::spawn_blocking(move || { + let mut generated_formats = Vec::new(); + let rgb = DynamicImage::from(image.image.to_rgb8()); + for (format_name, format_config) in &*formats { + if !format_config.is_fast && image.fast_thumbnails_only { continue } + let resized = if format_config.target_filesize != 0 { + let mut lb = 1; + let mut ub = 100; + loop { + let quality = (lb + ub) / 2; + let thumbnail = rgb.resize( + format_config.target_width.min(rgb.width()), + u32::MAX, + FilterType::Lanczos3, + ); + let mut buf: Vec = Vec::new(); + let mut csr = Cursor::new(&mut buf); + // this is ugly but I don't actually know how to fix it (cannot factor it out due to issues with dyn Trait) + match format_config.format { + ImageFormat::Avif => thumbnail.write_with_encoder(avif::AvifEncoder::new_with_speed_quality(&mut csr, 4, quality)), + ImageFormat::Jpeg => thumbnail.write_with_encoder(jpeg::JpegEncoder::new_with_quality(&mut csr, quality)), + _ => unimplemented!() + }?; + if buf.len() > format_config.target_filesize { + ub = quality; + } else { + lb = quality + 1; + } + if lb >= ub { + break buf; + } + } + } else { + let thumbnail = rgb.resize( + format_config.target_width.min(rgb.width()), + u32::MAX, + FilterType::Lanczos3, + ); + let mut buf: Vec = Vec::new(); + let mut csr = Cursor::new(&mut buf); + match format_config.format { + ImageFormat::Avif => thumbnail.write_with_encoder(avif::AvifEncoder::new_with_speed_quality(&mut csr, 4, format_config.quality)), + ImageFormat::Jpeg => thumbnail.write_with_encoder(jpeg::JpegEncoder::new_with_quality(&mut csr, format_config.quality)), + ImageFormat::WebP => thumbnail.write_with_encoder(webp::WebPEncoder::new_lossless(&mut csr)), + _ => unimplemented!() + }?; + buf + }; + if resized.len() < image.original_filesize.unwrap_or(usize::MAX) { + generated_formats.push(format_name.clone()); + let thumbnail_path = Path::new(&config.service.thumbs_path).join( + generate_thumbnail_filename( + &image.filename, + format_name, + format_config, + ), + ); + THUMBNAILS_GENERATED_COUNTER.get_metric_with_label_values(&[format_name]).unwrap().inc(); + std::fs::write(thumbnail_path, resized)?; + } + } + Ok::, anyhow::Error>(generated_formats) + }).await??; + + IMAGES_THUMBNAILED_COUNTER.inc(); + let formats_data = rmp_serde::to_vec(&generated_formats)?; + let ts = timestamp(); + let filename_enc = filename.encode()?; + let mut conn = pool.acquire().await?; + ensure_filename_record_exists(&mut conn, &filename_enc).await?; + match filename { + Filename::VideoFrame(container, _) => { video_thumb_times.write().await.insert(container.clone(), timestamp()); }, + _ => () + } + sqlx::query!( + "UPDATE files SET thumbnails = ?, thumbnail_time = ? WHERE filename = ?", + formats_data, + ts, + filename_enc + ) + .execute(&mut *conn) + .await?; + Ok(()) +} + +#[instrument] +async fn do_ocr(image: LoadedImage, config: Arc, client: Client, pool: SqlitePool) -> Result<()> { + tracing::debug!("OCRing {:?}", image.filename); + let scan = match scan_image(&client, &image.image).await { + Ok(scan) => scan, + Err(e) => { + IMAGES_OCRED_ERROR_COUNTER.inc(); + tracing::error!("OCR failure {:?}: {}", image.filename, e); + return Ok(()) + } + }; + IMAGES_OCRED_COUNTER.inc(); + let ocr_text = scan + .iter() + .map(|segment| segment.text.clone()) + .collect::>() + .join("\n"); + let ocr_data = rmp_serde::to_vec(&scan)?; + let ts = timestamp(); + let filename_enc = image.filename.encode()?; + let mut conn = pool.acquire().await?; + ensure_filename_record_exists(&mut conn, &filename_enc).await?; + sqlx::query!( + "UPDATE files SET ocr = ?, raw_ocr_segments = ?, ocr_time = ? WHERE filename = ?", + ocr_text, + ocr_data, + ts, + filename_enc + ) + .execute(&mut *conn) + .await?; + Ok(()) +} + +#[instrument] async fn ingest_files(config: Arc) -> Result<()> { let pool = initialize_database(&config.service).await?; let client = Client::new(); - + let formats = image_formats(&config.service); - + let (to_process_tx, to_process_rx) = mpsc::channel::(100); let (to_embed_tx, to_embed_rx) = mpsc::channel(config.backend.batch as usize); let (to_thumbnail_tx, to_thumbnail_rx) = mpsc::channel(30); let (to_ocr_tx, to_ocr_rx) = mpsc::channel(30); let (to_metadata_write_tx, mut to_metadata_write_rx) = mpsc::channel::<(Filename, FileMetadata)>(100); - + let cpus = num_cpus::get(); let video_meta = Arc::new(RwLock::new(HashMap::new())); @@ -363,102 +624,10 @@ async fn ingest_files(config: Arc) -> Result<()> { let to_ocr_tx = to_ocr_tx.clone(); let video_meta = video_meta.clone(); let to_metadata_write_tx = to_metadata_write_tx.clone(); - async move { - let path = Path::new(&config.service.files).join(&*record.filename); - let image: Result> = tokio::task::block_in_place(|| Ok(Arc::new(ImageReader::open(&path)?.with_guessed_format()?.decode()?))); - let image = match image { - Ok(image) => image, - Err(e) => { - log::warn!("Could not read {} as image: {}", record.filename, e); - let filename = record.filename.clone(); - IMAGES_LOADED_ERROR_COUNTER.inc(); - let meta = tokio::task::spawn_blocking(move || -> Result> { - let mut i: u32 = 0; - let mut last_metadata = None; - let callback = |frame: RgbImage| { - let frame: Arc = Arc::new(frame.into()); - let embed_buf = resize_for_embed_sync(config.backend.clone(), frame.clone())?; - let filename = Filename::VideoFrame(filename.clone(), i); - to_embed_tx.blocking_send(EmbeddingInput { - image: embed_buf, - filename: filename.clone() - })?; - let meta = FileMetadata { - height: frame.height(), - width: frame.width(), - frames: Some(i + 1) - }; - last_metadata = Some(meta.clone()); - to_metadata_write_tx.blocking_send((filename.clone(), meta))?; - if config.service.enable_thumbs { - to_thumbnail_tx.blocking_send(LoadedImage { - image: frame.clone(), - filename, - original_filesize: None, - fast_thumbnails_only: true - })?; - } - i += 1; - Ok(()) - }; - match video_reader::run(&path, callback, config.service.video_frame_interval) { - Ok(()) => { - VIDEOS_LOADED_COUNTER.inc(); - return anyhow::Result::Ok(last_metadata) - }, - Err(e) => { - log::error!("Could not read {} as video: {}", filename, e); - VIDEOS_LOADED_ERROR_COUNTER.inc(); - } - } - return anyhow::Result::Ok(last_metadata) - }).await??; - if let Some(meta) = meta { - video_meta.write().await.insert(record.filename, meta); - } - return Ok(()) - } - }; - let filename = Filename::Actual(record.filename); - if record.needs_metadata { - let metadata = FileMetadata { - width: image.width(), - height: image.height(), - frames: None - }; - to_metadata_write_tx.send((filename.clone(), metadata)).await?; - } - IMAGES_LOADED_COUNTER.inc(); - if record.needs_embed { - let resized = resize_for_embed(config.backend.clone(), image.clone()).await?; - - to_embed_tx.send(EmbeddingInput { image: resized, filename: filename.clone() }).await? - } - if record.needs_thumbnail { - to_thumbnail_tx - .send(LoadedImage { - image: image.clone(), - filename: filename.clone(), - original_filesize: Some(std::fs::metadata(&path)?.len() as usize), - fast_thumbnails_only: false - }) - .await?; - } - if record.needs_ocr { - to_ocr_tx - .send(LoadedImage { - image, - filename: filename.clone(), - original_filesize: None, - fast_thumbnails_only: true - }) - .await?; - } - Ok(()) - } + load_image(record, to_embed_tx, to_thumbnail_tx, to_ocr_tx, to_metadata_write_tx, config, video_meta) }) }); - + let metadata_writer: JoinHandle> = tokio::spawn({ let pool = pool.clone(); async move { @@ -468,7 +637,7 @@ async fn ingest_files(config: Arc) -> Result<()> { Ok(()) } }); - + let thumbnail_generation: Option>> = if config.service.enable_thumbs { let config = config.clone(); let pool = pool.clone(); @@ -477,145 +646,29 @@ async fn ingest_files(config: Arc) -> Result<()> { let video_thumb_times = video_thumb_times.clone(); Some(tokio::spawn({ stream.try_for_each_concurrent(Some(cpus), move |image| { - use image::codecs::*; - let formats = formats.clone(); let config = config.clone(); let pool = pool.clone(); let video_thumb_times = video_thumb_times.clone(); - async move { - let filename = image.filename.clone(); - log::debug!("thumbnailing {:?}", filename); - let generated_formats = tokio::task::spawn_blocking(move || { - let mut generated_formats = Vec::new(); - let rgb = DynamicImage::from(image.image.to_rgb8()); - for (format_name, format_config) in &*formats { - if !format_config.is_fast && image.fast_thumbnails_only { continue } - let resized = if format_config.target_filesize != 0 { - let mut lb = 1; - let mut ub = 100; - loop { - let quality = (lb + ub) / 2; - let thumbnail = rgb.resize( - format_config.target_width.min(rgb.width()), - u32::MAX, - FilterType::Lanczos3, - ); - let mut buf: Vec = Vec::new(); - let mut csr = Cursor::new(&mut buf); - // this is ugly but I don't actually know how to fix it (cannot factor it out due to issues with dyn Trait) - match format_config.format { - ImageFormat::Avif => thumbnail.write_with_encoder(avif::AvifEncoder::new_with_speed_quality(&mut csr, 4, quality)), - ImageFormat::Jpeg => thumbnail.write_with_encoder(jpeg::JpegEncoder::new_with_quality(&mut csr, quality)), - _ => unimplemented!() - }?; - if buf.len() > format_config.target_filesize { - ub = quality; - } else { - lb = quality + 1; - } - if lb >= ub { - break buf; - } - } - } else { - let thumbnail = rgb.resize( - format_config.target_width.min(rgb.width()), - u32::MAX, - FilterType::Lanczos3, - ); - let mut buf: Vec = Vec::new(); - let mut csr = Cursor::new(&mut buf); - match format_config.format { - ImageFormat::Avif => thumbnail.write_with_encoder(avif::AvifEncoder::new_with_speed_quality(&mut csr, 4, format_config.quality)), - ImageFormat::Jpeg => thumbnail.write_with_encoder(jpeg::JpegEncoder::new_with_quality(&mut csr, format_config.quality)), - ImageFormat::WebP => thumbnail.write_with_encoder(webp::WebPEncoder::new_lossless(&mut csr)), - _ => unimplemented!() - }?; - buf - }; - if resized.len() < image.original_filesize.unwrap_or(usize::MAX) { - generated_formats.push(format_name.clone()); - let thumbnail_path = Path::new(&config.service.thumbs_path).join( - generate_thumbnail_filename( - &image.filename, - format_name, - format_config, - ), - ); - THUMBNAILS_GENERATED_COUNTER.get_metric_with_label_values(&[format_name]).unwrap().inc(); - std::fs::write(thumbnail_path, resized)?; - } - } - Ok::, anyhow::Error>(generated_formats) - }).await??; - IMAGES_THUMBNAILED_COUNTER.inc(); - let formats_data = rmp_serde::to_vec(&generated_formats)?; - let ts = timestamp(); - let filename_enc = filename.encode()?; - let mut conn = pool.acquire().await?; - ensure_filename_record_exists(&mut conn, &filename_enc).await?; - match filename { - Filename::VideoFrame(container, _) => { video_thumb_times.write().await.insert(container.clone(), timestamp()); }, - _ => () - } - sqlx::query!( - "UPDATE files SET thumbnails = ?, thumbnail_time = ? WHERE filename = ?", - formats_data, - ts, - filename_enc - ) - .execute(&mut *conn) - .await?; - Ok(()) - } + generate_thumbnail(image, config, video_thumb_times, pool, formats) }) })) } else { None }; - + // TODO: save OCR errors and don't retry let ocr: Option>> = if config.service.enable_ocr { let client = client.clone(); let pool = pool.clone(); + let config = config.clone(); let stream = ReceiverStream::new(to_ocr_rx).map(Ok); Some(tokio::spawn({ stream.try_for_each_concurrent(Some(config.service.ocr_concurrency), move |image| { let client = client.clone(); let pool = pool.clone(); - async move { - log::debug!("OCRing {:?}", image.filename); - let scan = match scan_image(&client, &image.image).await { - Ok(scan) => scan, - Err(e) => { - IMAGES_OCRED_ERROR_COUNTER.inc(); - log::error!("OCR failure {:?}: {}", image.filename, e); - return Ok(()) - } - }; - IMAGES_OCRED_COUNTER.inc(); - let ocr_text = scan - .iter() - .map(|segment| segment.text.clone()) - .collect::>() - .join("\n"); - let ocr_data = rmp_serde::to_vec(&scan)?; - let ts = timestamp(); - let filename_enc = image.filename.encode()?; - let mut conn = pool.acquire().await?; - ensure_filename_record_exists(&mut conn, &filename_enc).await?; - sqlx::query!( - "UPDATE files SET ocr = ?, raw_ocr_segments = ?, ocr_time = ? WHERE filename = ?", - ocr_text, - ocr_data, - ts, - filename_enc - ) - .execute(&mut *conn) - .await?; - Ok(()) - } + let config = config.clone(); + do_ocr(image, config, client, pool) }) })) } else { @@ -634,45 +687,12 @@ async fn ingest_files(config: Arc) -> Result<()> { let config = config.clone(); let pool = pool.clone(); let video_embed_times = video_embed_times.clone(); - async move { - let result: Vec = query_clip_server( - &client, - &config.service.clip_server, - "", - EmbeddingRequest::Images { - images: batch.iter().map(|input| serde_bytes::ByteBuf::from(input.image.clone())).collect(), - }, - ).await.context("querying CLIP server")?; - - let mut tx = pool.begin().await?; - let ts = timestamp(); - for (i, vector) in result.into_iter().enumerate() { - let vector = vector.into_vec(); - log::debug!("embedded {:?}", batch[i].filename); - let encoded_filename = batch[i].filename.encode()?; - IMAGES_EMBEDDED_COUNTER.inc(); - ensure_filename_record_exists(&mut *tx, &encoded_filename).await?; - match &batch[i].filename { - Filename::VideoFrame(container, _) => { video_embed_times.write().await.insert(container.clone(), timestamp()); }, - _ => () - } - sqlx::query!( - "UPDATE files SET embedding_time = ?, embedding = ? WHERE filename = ?", - ts, - vector, - encoded_filename - ) - .execute(&mut *tx) - .await?; - } - tx.commit().await?; - anyhow::Result::Ok(()) - } + handle_embedding_batch(client, config, pool, batch, video_embed_times) }) }); - + let mut actual_filenames = HashMap::new(); - + // blocking OS calls tokio::task::block_in_place(|| -> anyhow::Result<()> { for entry in WalkDir::new(config.service.files.as_str()) { @@ -688,7 +708,7 @@ async fn ingest_files(config: Arc) -> Result<()> { Ok(()) })?; - log::debug!("finished reading filenames"); + tracing::debug!("finished reading filenames"); for (filename, (_path, modtime)) in actual_filenames.iter() { let modtime = *modtime; @@ -721,7 +741,7 @@ async fn ingest_files(config: Arc) -> Result<()> { } }; if let Some(record) = new_record { - log::debug!("processing {}", record.filename); + tracing::debug!("processing {}", record.filename); // we need to exit here to actually capture the error if !to_process_tx.send(record).await.is_ok() { break @@ -730,20 +750,20 @@ async fn ingest_files(config: Arc) -> Result<()> { } drop(to_process_tx); - + embedding_generation.await?.context("generating embeddings")?; metadata_writer.await?.context("writing metadata")?; - + if let Some(thumbnail_generation) = thumbnail_generation { thumbnail_generation.await?.context("generating thumbnails")?; } - + if let Some(ocr) = ocr { ocr.await?.context("OCRing")?; } image_loading.await?.context("loading images")?; - + let stored: Vec> = sqlx::query_scalar("SELECT filename FROM files").fetch_all(&pool).await?; let mut tx = pool.begin().await?; let video_meta = video_meta.read().await; @@ -785,13 +805,14 @@ async fn ingest_files(config: Arc) -> Result<()> { tx.commit().await?; - log::info!("Ingest done"); - + tracing::info!("Ingest done"); + Result::Ok(()) } const INDEX_ADD_BATCH: usize = 512; +#[instrument] async fn build_index(config: Arc) -> Result { let pool = initialize_database(&config.service).await?; @@ -846,7 +867,7 @@ async fn build_index(config: Arc) -> Result { } else { index.metadata.push(None); } - + for format_string in &formats { let mut found = false; for (i, name) in index.format_names.iter().enumerate() { @@ -904,6 +925,7 @@ struct QueryRequest { include_video: bool } +#[instrument(skip(index))] async fn query_index(index: &IIndex, query: EmbeddingVector, k: usize, video: bool) -> Result { let result = index.vectors.search(&query, k as usize)?; @@ -940,6 +962,7 @@ async fn query_index(index: &IIndex, query: EmbeddingVector, k: usize, video: bo }) } +#[instrument(skip(config, client, index))] async fn handle_request(config: Arc, client: Arc, index: &IIndex, req: Json) -> Result> { let mut total_embedding = ndarray::Array::from(vec![0.0; config.backend.embedding_size]); @@ -973,8 +996,8 @@ async fn handle_request(config: Arc, client: Arc, index: &IInde total_embedding = total_embedding + embedding * term.weight.unwrap_or(1.0); } } - - let mut batches = vec![]; + + let mut batches = vec![]; if !image_batch.is_empty() { batches.push( @@ -1016,12 +1039,13 @@ async fn handle_request(config: Arc, client: Arc, index: &IInde #[derive(Serialize, Deserialize)] struct FrontendInit { n_total: u64, - predefined_embedding_names: Vec + predefined_embedding_names: Vec, + d_emb: usize } #[tokio::main] async fn main() -> Result<()> { - pretty_env_logger::init(); + console_subscriber::init(); let config_path = std::env::args().nth(1).expect("Missing config file path"); let config: Config = serde_json::from_slice(&std::fs::read(config_path)?)?; @@ -1062,23 +1086,23 @@ async fn main() -> Result<()> { let index = index.clone(); async move { loop { - log::info!("Ingest running"); + tracing::info!("Ingest running"); match ingest_files(config.clone()).await { Ok(_) => { match build_index(config.clone()).await { Ok(new_index) => { LAST_INDEX_SIZE.set(new_index.vectors.ntotal() as i64); *index.write().await = new_index; - log::info!("Index loaded"); + tracing::info!("Index loaded"); } Err(e) => { - log::error!("Index build failed: {:?}", e); + tracing::error!("Index build failed: {:?}", e); ingest_done_tx.send((false, format!("{:?}", e))).unwrap(); } } } Err(e) => { - log::error!("Ingest failed: {:?}", e); + tracing::error!("Ingest failed: {:?}", e); ingest_done_tx.send((false, format!("{:?}", e))).unwrap(); } } @@ -1106,11 +1130,12 @@ async fn main() -> Result<()> { .route("/", get(|_req: ()| async move { Json(FrontendInit { n_total: index_.read().await.vectors.ntotal(), - predefined_embedding_names: config__.predefined_embeddings.keys().cloned().collect() + predefined_embedding_names: config__.predefined_embeddings.keys().cloned().collect(), + d_emb: config__.backend.embedding_size }) })) .route("/reload", post(|_req: ()| async move { - log::info!("Requesting index reload"); + tracing::info!("Requesting index reload"); let mut done_rx = done_tx.clone().subscribe(); let _ = request_ingest_tx.send(()).await; // ignore possible error, which is presumably because the queue is full match done_rx.recv().await { @@ -1141,9 +1166,9 @@ async fn main() -> Result<()> { .layer(cors); let addr = format!("0.0.0.0:{}", config_.service.port); - log::info!("Starting server on {}", addr); + tracing::info!("Starting server on {}", addr); let listener = tokio::net::TcpListener::bind(&addr).await.unwrap(); axum::serve(listener, app).await?; Ok(()) -} \ No newline at end of file +} diff --git a/src/ocr.rs b/src/ocr.rs index 90e523e..46e9d46 100644 --- a/src/ocr.rs +++ b/src/ocr.rs @@ -9,6 +9,7 @@ use reqwest::{ use serde_json::Value; use std::{io::Cursor, time::{SystemTime, UNIX_EPOCH}}; use serde::{Deserialize, Serialize}; +use tracing::instrument; const CALLBACK_REGEX: &str = r">AF_initDataCallback\((\{key: 'ds:1'.*?\})\);"; const MAX_DIM: u32 = 1024; @@ -45,6 +46,7 @@ fn rationalize_coords_format1( } } +#[instrument(skip(client, image))] async fn scan_image_chunk( client: &Client, image: &[u8], @@ -130,13 +132,14 @@ async fn scan_image_chunk( .collect()) } +#[instrument(skip(client))] pub async fn scan_image(client: &Client, image: &DynamicImage) -> Result { let mut result = ScanResult::new(); let (width, height) = image.dimensions(); let (width, height, image) = if width > MAX_DIM { let height = ((height as f64) * (MAX_DIM as f64) / (width as f64)).round() as u32; - let new_image = tokio::task::block_in_place(|| image.resize_exact(MAX_DIM, height, image::imageops::FilterType::Lanczos3)); + let new_image = tokio::task::block_in_place(|| image.resize_exact(MAX_DIM, height, image::imageops::FilterType::CatmullRom)); (MAX_DIM, height, std::borrow::Cow::Owned(new_image)) } else { (width, height, std::borrow::Cow::Borrowed(image)) @@ -170,4 +173,4 @@ pub async fn scan_image(client: &Client, image: &DynamicImage) -> Result + #[serde(with = "serde_bytes")] + embedding: Vec, + metadata: OriginalImageMetadata } lazy_static! { + // we do exclude galleries doing this but there don't seem to be any in the dataset static ref URL_IGNORE: RegexSet = RegexSet::new([ r"//reddit\.com", r"\.html?", @@ -69,16 +83,39 @@ lazy_static! { r"\?articleid=", r"\.aspx?", r"\.xml", - r"//youtube\.com", r"/rss/", r"//vimeo\.com", - r"//www\.youtube\.com", - r"//youtu\.be", r"//www\.reddit\.com", + r"//v\.redd\.it", + r"\.gifv$", + r"youtube\.com/user/" // TODO fill in more things, maybe try and collect thumbnails or something ]).unwrap(); - static ref ACCEPTABLE_FILETYPES: HashSet<&'static [u8]> = ["image/png", "image/webp", "image/avif", "image/jpeg", "image/gif", "image/webp", "image/apng", "image/bmp", "image/tiff"] - .into_iter().map(str::as_bytes).collect(); + static ref URL_MUST_CONTAIN: RegexSet = RegexSetBuilder::new([ + "jpg", + "jpeg", + "png", + "webp", + r"\.gif", + "=gif", + "jpeg", + "bmp", + "tiff", + "avif", + "webp", + "imgur", + "image", + r"//i\.", + "img", + r"cdn\.", + r"media\.", + "/i/", + "/media", + r"youtu\.be", + r"youtube\.com", + ]).case_insensitive(true).build().unwrap(); + static ref ACCEPTABLE_FILETYPES: HashSet<&'static str> = ["image/png", "image/webp", "image/avif", "image/jpeg", "image/gif", "image/webp", "image/apng", "image/bmp", "image/tiff"] + .into_iter().collect(); static ref OBJECT_HACKY_IGNORE: bytes::RegexSet = bytes::RegexSet::new([ r#""author":"\[deleted\]""#, r#""promoted":true"#, // these seem to be ads which are in the data for some reason, and lack some important fields @@ -86,11 +123,34 @@ lazy_static! { r"\x00" // for SOME REASON one of the JSON files contains a lot of null bytes before one particular record, so just ignore that record ]).unwrap(); static ref URL_REPLACEMENT_RULES: Vec<(Regex, &'static str)> = [ - (r"//imgur.com/([A-Za-z0-9]+)", r"//i.imgur.com/$1.jpg"), - (r"^http://", r"https://") + (r"imgur\.com/([A-Za-z0-9]+),", r"imgur.com/$1"), + (r"//imgur\.com/([A-Za-z0-9]+)$", r"//i.imgur.com/$1.jpg"), + (r"//www\.imgur\.com/([A-Za-z0-9]+)$", r"//i.imgur.com/$1.jpg"), + (r"//m\.imgur\.com/([A-Za-z0-9]+)$", r"//i.imgur.com/$1.jpg"), + (r"^http://", r"https://"), + (r"//youtu\.be/(.*)", r"//youtube.com/watch?v=$1"), + (r"//[a-z]+\.youtube\.com/(.*)", r"//youtube.com/$1"), + (r"//www.youtube.com/attribution_link?.*v%3D([A-Za-z0-9_-]+).*", r"//i.ytimg.com/vi/$1/maxresdefault.jpg"), // redirect to youtube thumbnail API + (r"//youtube.com/embed/([A-Za-z0-9_-]+)", r"//i.ytimg.com/vi/$1/maxresdefault.jpg"), + (r"//youtube\.com/(?:.*)v=([A-Za-z0-9_-]+)(?:.*)", r"//i.ytimg.com/vi/$1/maxresdefault.jpg"), + (r"&", "&") // this is such an intensely cursed feature of the dumps ].into_iter().map(|(r, e)| (Regex::new(r).unwrap(), e)).collect(); + + static ref HTML_EXTRACTION_RULES: Vec<(Regex, Regex)> = [ + (r"//imgur\.com/a/[A-Za-z0-9]+", r#""#), + (r"//imgur\.com/gallery/[A-Za-z0-9]+", r#""#), + ].into_iter().map(|(r, e)| (Regex::new(r).unwrap(), Regex::new(e).unwrap())).collect(); + + static ref IMAGES_FETCHED_COUNTER: IntCounter = register_int_counter!("mse_scrape_images_fetched", "images fetched").unwrap(); + static ref IMAGES_PROCESSED_COUNTER: IntCounter = register_int_counter!("mse_scrape_images_processed", "images processed").unwrap(); + static ref ENTRIES_PROCESSED_COUNTER: IntCounter = register_int_counter!("mse_scrape_entries_processed", "entries processed").unwrap(); + static ref IMAGES_FAILED_COUNTER: IntCounter = register_int_counter!("mse_scrape_images_failed", "images failed").unwrap(); + static ref IMAGE_FILESIZES_HISTOGRAM: HistogramVec = register_histogram_vec!("mse_scrape_image_filesizes", "filesizes of successfully fetched images", &["format"], prometheus::exponential_buckets(100.0, 1.5, 29).unwrap()).unwrap(); + static ref IMAGE_PIXELS_HISTOGRAM: HistogramVec = register_histogram_vec!("mse_scrape_image_pixels", "pixel count of successfully fetched images", &["format"], prometheus::exponential_buckets(100.0, 1.3, 53).unwrap()).unwrap(); + static ref HTML_EXTRACTS_COUNTER: IntCounter = register_int_counter!("mse_scrape_html_extracts", "html extraction operations").unwrap(); } +#[instrument(skip(tx))] fn process_file(path: PathBuf, tx: mpsc::Sender, timestamp_threshold: Option) -> Result<()> { let mut stream = zstd::stream::Decoder::new(fs::File::open(path)?)?; stream.window_log_max(31)?; @@ -105,15 +165,16 @@ fn process_file(path: PathBuf, tx: mpsc::Sender, timestamp_threshold: Opt buf.clear(); continue; } + ENTRIES_PROCESSED_COUNTER.inc(); let entry = match sonic_rs::serde::from_slice::(buf.as_slice()) { Ok(x) => x, Err(e) => { - log::warn!("parse failed, please validate {:?} {:?}", e, String::from_utf8_lossy(&buf)); + tracing::warn!("parse failed, please validate {:?} {:?}", e, String::from_utf8_lossy(&buf)); return Ok(()) } }; if entry.selftext.is_empty() && !entry.over_18 && entry.author.is_some() && entry.subreddit.is_some() { - if !URL_IGNORE.is_match(&entry.url) { + if !URL_IGNORE.is_match(&entry.url) && URL_MUST_CONTAIN.is_match(&entry.url) { match &entry.post_hint { Some(x) if x == "na" || x == "image" => { // Technically this is slightly wrong because we reorder images slightly, but as long as it is not restarted all the time this is "fine". @@ -127,7 +188,7 @@ fn process_file(path: PathBuf, tx: mpsc::Sender, timestamp_threshold: Opt }, None => true }; - + if after_threshold { tx.blocking_send(entry)?; } }, _ => () @@ -139,23 +200,38 @@ fn process_file(path: PathBuf, tx: mpsc::Sender, timestamp_threshold: Opt Ok(()) } +#[derive(Debug)] struct Config { max_content_length: usize, input: String, output: String, backend: String, mode: OperatingMode, - filename_threshold: Option + filename_threshold: Option, + metrics_addr: String, + contact_info: String } -async fn fetch_file(client: reqwest::Client, config: Arc, url: &str) -> Result> { - // inelegant but I can't get it to work using Cows +#[instrument(skip(client, config))] +#[async_recursion::async_recursion] +async fn fetch_file(client: reqwest::Client, config: Arc, url: &str) -> Result<(Vec, String, String)> { let mut url = url.to_string(); for (regex, replacement) in URL_REPLACEMENT_RULES.iter() { url = regex.replace(&url, *replacement).to_string(); } + + let mut html_extract_rule = None; + + for (url_rule, extract_rule) in HTML_EXTRACTION_RULES.iter() { + if url_rule.is_match(&url) { + html_extract_rule = Some(extract_rule); + break; + } + } + let mut response = client.get(&*url).send().await?; - if !ACCEPTABLE_FILETYPES.contains(response.headers().get(reqwest::header::CONTENT_TYPE).context("no contept type")?.as_bytes()) { + let content_type = std::str::from_utf8(&response.headers().get(reqwest::header::CONTENT_TYPE).context("no content type")?.as_bytes())?.to_owned(); + if !(ACCEPTABLE_FILETYPES.contains(&content_type[..]) || (html_extract_rule.is_some() && content_type == "text/html")) { return Err(anyhow!("invalid Content-Type")); } match response.content_length() { @@ -169,11 +245,24 @@ async fn fetch_file(client: reqwest::Client, config: Arc, url: &str) -> return Err(anyhow!("response too large")); } } - Ok(buffer) + if let Some(extract_rule) = html_extract_rule { + if content_type == "text/html" { + let buffer = String::from_utf8_lossy(&buffer).to_string(); + if let Some(mat) = extract_rule.captures(&buffer) { + let new_url = mat.get(1).unwrap().as_str(); + HTML_EXTRACTS_COUNTER.inc(); + tracing::debug!("found new URL: {}", new_url); + return fetch_file(client, config, new_url).await; + } else { + return Err(anyhow!("no extraction match")); + } + } + } + Ok((buffer, content_type, response.url().to_string())) } fn write_output(config: Arc, mut rx: Receiver) -> Result<()> { - let mut out = fs::File::options().append(true).open(&config.output)?; + let mut out = fs::File::options().create(true).append(true).open(&config.output)?; let stream = zstd::Encoder::new(&mut out, 15)?.auto_finish(); let mut buf_stream = BufWriter::new(stream); while let Some(x) = rx.blocking_recv() { @@ -182,12 +271,14 @@ fn write_output(config: Arc, mut rx: Receiver) -> Result Ok(()) } +#[derive(Debug)] enum OperatingMode { Count, Sample(f32), FullRun } +#[instrument] fn readback_output(path: &str) -> Result<(u64, usize)> { use rmp_serde::decode::Error; let stream = zstd::stream::Decoder::new(fs::File::open(path)?)?; @@ -208,27 +299,47 @@ fn readback_output(path: &str) -> Result<(u64, usize)> { Ok((latest_timestamp, count)) } +async fn serve_metrics(config: Arc) -> Result<()> { + let metrics = axum::Router::new().route("/metrics", axum::routing::get(|| async move { + let mut buffer = Vec::new(); + let encoder = prometheus::TextEncoder::new(); + let metric_families = prometheus::gather(); + encoder.encode(&metric_families, &mut buffer).unwrap(); + buffer + })); + let listener = tokio::net::TcpListener::bind(&config.metrics_addr).await?; + tokio::task::spawn(async move { + let _ = axum::serve(listener, metrics).await; + }); + Ok(()) +} + #[tokio::main] async fn main() -> Result<()> { - pretty_env_logger::init(); + console_subscriber::init(); + let cpus = num_cpus::get(); let config = Arc::new(Config { - max_content_length: 1<<23, - input: String::from("./submissions"), + max_content_length: 1<<24, + input: String::from("./reddit_subs_202212/"), output: String::from("./sample.zst"), backend: String::from("http://localhost:1708"), - mode: OperatingMode::Sample(0.004), - filename_threshold: None + mode: OperatingMode::FullRun, + filename_threshold: None, + metrics_addr: String::from("0.0.0.0:9914"), + contact_info: String::from("scraping-ops@osmarks.net") }); + serve_metrics(config.clone()).await?; + let timestamp_threshold = match config.mode { OperatingMode::Count => None, _ => { match readback_output(&config.output) { Ok(x) => Some(x), Err(e) => { - log::warn!("could not read output: {}", e); + tracing::warn!("could not read output: {}", e); None } } @@ -237,19 +348,19 @@ async fn main() -> Result<()> { if let Some((threshold, count)) = timestamp_threshold { - log::info!("threshold is {}, {} items", threshold, count); + tracing::info!("threshold is {}, {} items", threshold, count); } - + let backend = get_backend_config(&config.backend).await; - log::info!("connected to inference server"); + tracing::info!("connected to inference server"); let (entries_tx, mut entries_rx) = mpsc::channel::(32768); let (buffers_tx, buffers_rx) = mpsc::channel(128); let (resized_tx, resized_rx) = mpsc::channel(backend.batch); let (final_write_tx, final_write_rx) = mpsc::channel::(32768); let client = Client::builder() - .user_agent(concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"))) + .user_agent(format!("{}/{} (contact {})", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"), config.contact_info)) .timeout(Duration::from_secs(30)) .build()?; @@ -278,11 +389,13 @@ async fn main() -> Result<()> { } match fetch_file(client, config.clone(), &entry.url).await { Ok(buf) => { - log::debug!("got {}", &entry.url); + IMAGES_FETCHED_COUNTER.inc(); + tracing::debug!("got {}", &entry.url); buffers_tx.send((entry, buf)).await?; }, Err(e) => { - log::warn!("{} failed: {}", &entry.url, e) + IMAGES_FAILED_COUNTER.inc(); + tracing::debug!("{} failed: {}", &entry.url, e) } } Ok(()) @@ -296,8 +409,10 @@ async fn main() -> Result<()> { _ => Some(tokio::task::spawn({ let stream = ReceiverStream::new(buffers_rx); let backend = backend.clone(); - stream.map(Ok).try_for_each_concurrent(Some(cpus), move |(entry, buffer)| { + stream.map(Ok).try_for_each_concurrent(Some(cpus), move |(entry, (buffer, mime_type, final_url))| { let backend = backend.clone(); + let size = buffer.len(); + IMAGE_FILESIZES_HISTOGRAM.with_label_values(&[&mime_type]).observe(size as f64); let resized_tx = resized_tx.clone(); async move { let image_result = tokio::task::spawn_blocking(|| { @@ -308,12 +423,20 @@ async fn main() -> Result<()> { let image = match image_result { Ok(image) => image, Err(e) => { - log::warn!("loading {} failed: {}", entry.url, e); + tracing::debug!("loading {} failed: {}", entry.url, e); return Result::<(), anyhow::Error>::Ok(()); } }; + let dim = (image.width(), image.height()); + IMAGE_PIXELS_HISTOGRAM.with_label_values(&[&mime_type]).observe(dim.0 as f64 * dim.1 as f64); + let metadata = OriginalImageMetadata { + mime_type, + original_file_size: size, + dimension: dim, + final_url + }; let resized = resize_for_embed(backend.clone(), image).await?; - resized_tx.send((entry, resized)).await?; + resized_tx.send((entry, resized, metadata)).await?; Ok(()) } }) @@ -328,7 +451,7 @@ async fn main() -> Result<()> { let config = config.clone(); // keep multiple embedding requests in flight stream.map(Ok).try_for_each_concurrent(Some(3), move |batch| { - let (entries, bytes): (Vec, Vec>) = batch.into_iter().unzip(); + let (entries, bytes, batch_dimensions): (Vec, Vec>, Vec) = batch.into_iter().multiunzip(); let client = client.clone(); let config = config.clone(); let final_write_tx = final_write_tx.clone(); @@ -341,17 +464,20 @@ async fn main() -> Result<()> { images: bytes.into_iter().map(serde_bytes::ByteBuf::from).collect(), }, ).await.context("querying CLIP server")?; - - for (vector, entry) in result.into_iter().zip(entries) { + + for (vector, entry, + metadata) in itertools::izip!(result.into_iter(), entries, batch_dimensions) { final_write_tx.send(ProcessedEntry { url: entry.url, id: entry.id, title: entry.title, subreddit: entry.subreddit.unwrap(), author: entry.author.unwrap(), - blob: vector.into_vec(), - timestamp: entry.created_utc.to_u64()? + embedding: vector.into_vec(), + timestamp: entry.created_utc.to_u64()?, + metadata }).await?; + IMAGES_PROCESSED_COUNTER.inc(); } anyhow::Result::Ok(()) } @@ -365,7 +491,7 @@ async fn main() -> Result<()> { _ => None }; - log::info!("working..."); + tracing::info!("working..."); let mut paths = vec![]; for file in fs::read_dir(&config.input)? { @@ -381,36 +507,26 @@ async fn main() -> Result<()> { let mut file_readers = JoinSet::new(); - match config.mode { - OperatingMode::Count | OperatingMode::Sample(_) => { - let semaphore = Arc::new(Semaphore::new(cpus)); + let readers = match config.mode { + OperatingMode::Count | OperatingMode::Sample(_) => cpus, + OperatingMode::FullRun => 1 + }; - for path in paths { - let semaphore = semaphore.clone(); - let permit = semaphore.acquire_owned().await?; - let entries_tx = entries_tx.clone(); - let path_ = path.clone(); - log::info!("reading {:?}", path); - file_readers.spawn_blocking(move || { - match process_file(path_, entries_tx, timestamp_threshold.map(|(x, _)| x)) { - Ok(_) => (), - Err(e) => log::error!("could not parse {:?} {:?}", &path, e) - } - std::mem::drop(permit); - }); + let semaphore = Arc::new(Semaphore::new(readers)); + + for path in paths { + let semaphore = semaphore.clone(); + let permit = semaphore.acquire_owned().await?; + let entries_tx = entries_tx.clone(); + let path_ = path.clone(); + tracing::info!("reading {:?}", path); + file_readers.spawn_blocking(move || { + match process_file(path_, entries_tx, timestamp_threshold.map(|(x, _)| x)) { + Ok(_) => (), + Err(e) => tracing::error!("could not parse {:?} {:?}", &path, e) } - }, - OperatingMode::FullRun => { - for path in paths { - let entries_tx = entries_tx.clone(); - let path_ = path.clone(); - log::info!("reading {:?}", path); - file_readers.spawn_blocking(move || match process_file(path_, entries_tx, timestamp_threshold.map(|(x, _)| x)) { - Ok(_) => (), - Err(e) => log::error!("could not parse {:?} {:?}", &path, e) - }); - } - } + std::mem::drop(permit); + }); } while let Some(x) = file_readers.try_join_next() { @@ -419,9 +535,9 @@ async fn main() -> Result<()> { std::mem::drop(entries_tx); println!("{:?}", load_task.await?); - if let Some(task) = resize_task { println!("{:?}", task.await?); } - if let Some(task) = embedding_generation_task { println!("{:?}", task.await?) }; - if let Some(task) = output_writer_task { println!("{:?}", task.await?) }; + if let Some(task) = resize_task { println!("resize: {:?}", task.await?); } + if let Some(task) = embedding_generation_task { println!("embedding: {:?}", task.await?) }; + if let Some(task) = output_writer_task { println!("output: {:?}", task.await?) }; Ok(()) -} \ No newline at end of file +}