From a11bc0b22d8b1e9f09da35968eac475d2a2f9db2 Mon Sep 17 00:00:00 2001 From: osmarks Date: Sun, 20 Oct 2024 07:22:42 +0100 Subject: [PATCH] update dependencies, better tracing, fix semantic chunking, fix bug --- .gitignore | 1 + Cargo.lock | 1335 +++++++++++++++++--------- Cargo.toml | 13 +- atsc_backend.py | 15 +- config.toml | 4 +- src/indexer.rs | 17 +- src/indexers/atuin.rs | 8 +- src/indexers/books.rs | 14 +- src/indexers/firefox_history_dump.rs | 18 +- src/indexers/mediafiles.rs | 58 +- src/indexers/minoteaur.rs | 113 ++- src/indexers/rclwe.rs | 106 +- src/indexers/textfiles.rs | 112 ++- src/indexers/thunderbird_email.rs | 75 +- src/main.rs | 66 +- src/semantic.rs | 81 +- src/util.rs | 5 +- 17 files changed, 1309 insertions(+), 732 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1de5659 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +target \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index c63a254..a40a765 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,18 +4,18 @@ version = 3 [[package]] name = "addr2line" -version = "0.22.0" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" dependencies = [ "gimli", ] [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "ahash" @@ -55,9 +55,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "37bf3594c4c988a53154954629820791dde498571819ae4ca50ca811e060cc95" [[package]] name = "arbitrary" @@ -120,6 +120,28 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + [[package]] name = "async-task" version = "4.7.1" @@ -128,13 +150,13 @@ checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" [[package]] name = "async-trait" -version = "0.1.80" +version = "0.1.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -156,23 +178,70 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "axum" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "504e3947307ac8326a5437504c517c4b56716c9d98fac0028c2acc7ca47d70ae" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper 1.0.1", + "tower 0.5.1", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper 1.0.1", + "tower-layer", + "tower-service", +] [[package]] name = "backtrace" -version = "0.3.73" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", + "windows-targets 0.52.6", ] [[package]] @@ -250,24 +319,29 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.6.0" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" [[package]] name = "castaway" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" dependencies = [ "rustversion", ] [[package]] name = "cc" -version = "1.0.101" +version = "1.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac367972e516d45567c7eafc73d24e1c193dcf200a8d94e9db7b3d38b349572d" +checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" +dependencies = [ + "jobserver", + "libc", + "shlex", +] [[package]] name = "cfg-if" @@ -277,9 +351,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cfg_aliases" -version = "0.1.1" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" @@ -293,14 +367,14 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] name = "compact_str" -version = "0.8.0-beta" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a2dc81369dde6d31456eedbb4fd3d320f0b9713573dfe06e569e2bce7607f2" +checksum = "6050c3a16ddab2e412160b31f2c871015704239bca62f72f6e5f0be631d3f644" dependencies = [ "castaway", "cfg-if", @@ -333,6 +407,45 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "console-api" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ed14aa9c9f927213c6e4f3ef75faaad3406134efe84ba2cb7983431d5f0931" +dependencies = [ + "futures-core", + "prost", + "prost-types", + "tonic", + "tracing-core", +] + +[[package]] +name = "console-subscriber" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e3a111a37f3333946ebf9da370ba5c5577b18eb342ec683eb488dd21980302" +dependencies = [ + "console-api", + "crossbeam-channel", + "crossbeam-utils", + "futures-task", + "hdrhistogram", + "humantime", + "hyper-util", + "prost", + "prost-types", + "serde", + "serde_json", + "thread_local", + "tokio", + "tokio-stream", + "tonic", + "tracing", + "tracing-core", + "tracing-subscriber", +] + [[package]] name = "convert_case" version = "0.4.0" @@ -351,15 +464,15 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" -version = "0.2.12" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" dependencies = [ "libc", ] @@ -373,6 +486,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -416,19 +538,19 @@ dependencies = [ [[package]] name = "ctrlc" -version = "3.4.4" +version = "3.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "672465ae37dc1bc6380a6547a8883d5dd397b0f1faaad4f265726cc7042a5345" +checksum = "90eeab0aa92f3f9b4e87f258c72b139c207d251f9cbc1080a0086b86a8870dd3" dependencies = [ "nix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "darling" -version = "0.20.9" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" dependencies = [ "darling_core", "darling_macro", @@ -436,27 +558,27 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.9" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", "strsim", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] name = "darling_macro" -version = "0.20.9" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -501,38 +623,38 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] name = "derive_builder" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" dependencies = [ "derive_builder_macro", ] [[package]] name = "derive_builder_core" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] name = "derive_builder_macro" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -545,7 +667,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -588,7 +710,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -612,19 +734,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "env_logger" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" -dependencies = [ - "humantime", - "is-terminal", - "log", - "regex", - "termcolor", -] - [[package]] name = "epub" version = "2.1.2" @@ -704,15 +813,15 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "fastrand" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "flate2" -version = "1.0.30" +version = "1.0.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" dependencies = [ "crc32fast", "miniz_oxide", @@ -750,9 +859,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" dependencies = [ "futures-channel", "futures-core", @@ -765,9 +874,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -775,15 +884,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" dependencies = [ "futures-core", "futures-task", @@ -792,9 +901,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-lite" @@ -811,26 +920,26 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] name = "futures-sink" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-timer" @@ -840,9 +949,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-channel", "futures-core", @@ -890,15 +999,15 @@ dependencies = [ [[package]] name = "gimli" -version = "0.29.0" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "h2" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" dependencies = [ "atomic-waker", "bytes", @@ -906,7 +1015,7 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap", + "indexmap 2.6.0", "slab", "tokio", "tokio-util", @@ -923,6 +1032,12 @@ dependencies = [ "crunchy", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hashbrown" version = "0.14.5" @@ -932,13 +1047,32 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" + [[package]] name = "hashlink" version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", +] + +[[package]] +name = "hdrhistogram" +version = "7.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +dependencies = [ + "base64 0.21.7", + "byteorder", + "flate2", + "nom", + "num-traits", ] [[package]] @@ -1007,9 +1141,9 @@ dependencies = [ [[package]] name = "http-body" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", "http", @@ -1030,9 +1164,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.9.4" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" +checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" [[package]] name = "httpdate" @@ -1048,9 +1182,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "1.3.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d" +checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" dependencies = [ "bytes", "futures-channel", @@ -1059,6 +1193,7 @@ dependencies = [ "http", "http-body", "httparse", + "httpdate", "itoa", "pin-project-lite", "smallvec", @@ -1068,21 +1203,34 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.2" +version = "0.27.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" dependencies = [ "futures-util", "http", "hyper", "hyper-util", - "rustls 0.23.10", + "rustls", "rustls-pki-types", "tokio", "tokio-rustls", "tower-service", ] +[[package]] +name = "hyper-timeout" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "hyper-tls" version = "0.6.0" @@ -1101,9 +1249,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.5" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56" +checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" dependencies = [ "bytes", "futures-channel", @@ -1114,16 +1262,15 @@ dependencies = [ "pin-project-lite", "socket2", "tokio", - "tower", "tower-service", "tracing", ] [[package]] name = "iana-time-zone" -version = "0.1.60" +version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1157,6 +1304,7 @@ dependencies = [ "async-walkdir", "chrono", "compact_str", + "console-subscriber", "deadpool-postgres", "derive_more", "epub", @@ -1165,15 +1313,14 @@ dependencies = [ "html5gum", "im", "lazy_static", - "log", "mail-parser", "maud", "ntex", "ntex-files", "num_cpus", + "pcre2", "percent-encoding", "pgvector", - "pretty_env_logger", "regex", "reqwest", "rmp-serde", @@ -1190,6 +1337,8 @@ dependencies = [ "tokio-postgres", "tokio-stream", "toml", + "tracing", + "tracing-subscriber", "ulid", ] @@ -1220,12 +1369,22 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.6" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.15.0", ] [[package]] @@ -1249,7 +1408,7 @@ checksum = "0122b7114117e64a63ac49f752a5ca4624d534c7b1c7de796ac196381cd2d947" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -1263,20 +1422,9 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.9.0" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" - -[[package]] -name = "is-terminal" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" -dependencies = [ - "hermit-abi 0.3.9", - "libc", - "windows-sys 0.52.0", -] +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" [[package]] name = "itertools" @@ -1296,6 +1444,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -1309,10 +1466,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47f142fe24a9c9944451e8349de0a56af5f3e7226dc46f3ed4d4ecc0b85af75e" [[package]] -name = "js-sys" -version = "0.3.69" +name = "jobserver" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] @@ -1331,9 +1497,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" [[package]] name = "libredox" @@ -1373,9 +1539,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "macro_rules_attribute" @@ -1395,13 +1561,28 @@ checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568" [[package]] name = "mail-parser" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed5a1335c3a964788c90cb42ae04a34b5f2628e89566949ce3bd4ada695c0bcd" +checksum = "93c3b9e5d8b17faf573330bbc43b37d6e918c0a3bf8a88e7d0a220ebc84af9fc" dependencies = [ "encoding_rs", ] +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "maud" version = "0.26.0" @@ -1421,7 +1602,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -1464,22 +1645,23 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.4" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" dependencies = [ - "adler", + "adler2", ] [[package]] name = "mio" -version = "0.8.11" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ + "hermit-abi 0.3.9", "libc", "wasi", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -1500,7 +1682,7 @@ checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -1528,9 +1710,9 @@ dependencies = [ [[package]] name = "nix" -version = "0.28.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" dependencies = [ "bitflags 2.6.0", "cfg-if", @@ -1550,12 +1732,13 @@ dependencies = [ [[package]] name = "ntex" -version = "2.0.3" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7e294048bb36df432ea5fb3bb236445fbcba8de413b535463c0634ee4cb96bf" +checksum = "223834e688405dcc46b5c28bc9225648c603e64d7b61e8903da33064b6f1464e" dependencies = [ "base64 0.22.1", "bitflags 2.6.0", + "bytes", "encoding_rs", "httparse", "httpdate", @@ -1630,9 +1813,9 @@ dependencies = [ [[package]] name = "ntex-h2" -version = "1.0.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f0b6bd1bdce912d5ccb4fb9e442f89c129dc07bdb5cad321606e976e989dc0" +checksum = "8e01b86bf30768ed7dca26bf279d0e0798ba5acf0baef4b0ea8e17a91ba71ad4" dependencies = [ "bitflags 2.6.0", "fxhash", @@ -1643,7 +1826,6 @@ dependencies = [ "ntex-http", "ntex-io", "ntex-net", - "ntex-rt", "ntex-service", "ntex-util", "pin-project-lite", @@ -1666,14 +1848,15 @@ dependencies = [ [[package]] name = "ntex-io" -version = "2.0.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72c60a60c517453a2ea7fd40483b87531bd8d6ce0e545f4643dabd1f646620a9" +checksum = "80c49628e35ff52f36137a8e732261f392de621406a163571888f6163e3f6b10" dependencies = [ "bitflags 2.6.0", "log", "ntex-bytes", "ntex-codec", + "ntex-rt", "ntex-service", "ntex-util", "pin-project-lite", @@ -1692,9 +1875,9 @@ dependencies = [ [[package]] name = "ntex-net" -version = "2.0.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44110aa49eddc9fdee2e673698fb6cc61bea3c1df1c8239bab86d465cafd2097" +checksum = "02f3d87616c8fc39c41d432402d98a118861e8e144df30037fe1400cdd29ac35" dependencies = [ "log", "ntex-bytes", @@ -1722,9 +1905,9 @@ dependencies = [ [[package]] name = "ntex-rt" -version = "0.4.13" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "471b8ce4b4edfd8eddcb4143ebd91022e949b96627e14436bf0beec66ecd6394" +checksum = "76f86c83f89053c29dcf5f1e9663c53726eea337a3221fa243e61e0410a40ad7" dependencies = [ "async-channel", "futures-core", @@ -1735,9 +1918,9 @@ dependencies = [ [[package]] name = "ntex-server" -version = "2.1.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e04e83107d59c667ab49d7fff5bcbf7b144ff778a41fd94f68c5aea5485069e" +checksum = "1b9c3f4b038d1bcc3aff4e457a4b8258828b8e119c9ef4fd1e42c8df5e732cee" dependencies = [ "async-broadcast", "async-channel", @@ -1756,18 +1939,18 @@ dependencies = [ [[package]] name = "ntex-service" -version = "3.0.0" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcebe6c7fa4d346646ef94c29957aab40078e91cd4c73ef3a1394f2a49de50d4" +checksum = "02daa9c4fc8b5382b24dd69d504599a72774d6828e4fc21e9013cb62096db7aa" dependencies = [ "slab", ] [[package]] name = "ntex-tls" -version = "2.0.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d996277357fbbf5bd73e1ec9e93a97e3192ee748f496f8dde88a578146647d4" +checksum = "6e08948d9a1d27d11c474c374e6b8c0eee7e2dd4a288967d5dcce13d7adbd80e" dependencies = [ "log", "ntex-bytes", @@ -1779,9 +1962,9 @@ dependencies = [ [[package]] name = "ntex-tokio" -version = "0.5.0" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5715ef31e5a599ab467846acd948ea597193fc08343338fdecbd1e5fe181af3d" +checksum = "623868ff022f737d7b94212dc85e471f895e58f6c59c72552cdc9a22c5f167ed" dependencies = [ "log", "ntex-bytes", @@ -1792,9 +1975,9 @@ dependencies = [ [[package]] name = "ntex-util" -version = "2.1.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "077bcb54200ad9e542553504d86615fc682dcfe7a5086b260109b921f14c7d7f" +checksum = "b95f0cf57859407e61c61a7e131cd339b05537046580b65abbf0a817f46917be" dependencies = [ "bitflags 2.6.0", "futures-core", @@ -1808,6 +1991,16 @@ dependencies = [ "slab", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1829,23 +2022,23 @@ dependencies = [ [[package]] name = "num_enum" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02339744ee7253741199f897151b38e72257d13802d4ee837285cc2990a90845" +checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179" dependencies = [ "num_enum_derive", ] [[package]] name = "num_enum_derive" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "681030a937600a36906c185595136d26abfebb4aa9c65701cefcaf8578bb982b" +checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -1856,18 +2049,18 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "object" -version = "0.36.0" +version = "0.36.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "oneshot" @@ -1899,9 +2092,9 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.64" +version = "0.10.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" dependencies = [ "bitflags 2.6.0", "cfg-if", @@ -1920,7 +2113,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -1931,9 +2124,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.102" +version = "0.9.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" dependencies = [ "cc", "libc", @@ -1948,10 +2141,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] -name = "parking" -version = "2.2.0" +name = "overload" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" [[package]] name = "parking_lot" @@ -1971,9 +2170,9 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.2", + "redox_syscall", "smallvec", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -1982,6 +2181,27 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pcre2" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be55c43ac18044541d58d897e8f4c55157218428953ebd39d86df3ba0286b2b" +dependencies = [ + "libc", + "log", + "pcre2-sys", +] + +[[package]] +name = "pcre2-sys" +version = "0.2.9" +source = "git+https://github.com/osmarks/rust-pcre2/?rev=ec7d5cf#ec7d5cfdd0f93a4a3bbe3cb180af45a42abb3c60" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -1990,9 +2210,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pgvector" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b635319f45b3c75207dbb51839b56056816c98a13fd1258e4a859ad540307625" +checksum = "6ed92bf218dbe236609222dca0345767408ee7d5c93876c7fe09fa9b03f7249f" dependencies = [ "bytes", "half", @@ -2019,22 +2239,22 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.5" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" +checksum = "baf123a161dde1e524adf36f90bc5d8d3462824a9c43553ad07a8183161189ec" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.5" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" +checksum = "a4502d8515ca9f32f1fb543d987f63d95a14934883db45bdb48060b6b69257f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -2051,9 +2271,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "piper" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1d5c74c9876f070d3e8fd503d748c7d974c3e48da8f41350fa5222ef9b4391" +checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" dependencies = [ "atomic-waker", "fastrand", @@ -2062,15 +2282,15 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "polling" -version = "3.7.2" +version = "3.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3ed00ed3fbf728b5816498ecd316d1716eecaced9c0c8d2c5a6740ca214985b" +checksum = "cc2790cd301dec6cd3b7a025e4815cf825724a51c98dccfe6a3e55f05ffb6511" dependencies = [ "cfg-if", "concurrent-queue", @@ -2078,22 +2298,22 @@ dependencies = [ "pin-project-lite", "rustix", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "portable-atomic" -version = "1.6.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" [[package]] name = "postgres-protocol" -version = "0.6.6" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b6c5ef183cd3ab4ba005f1ca64c21e8bd97ce4699cfea9e8d9a2c4958ca520" +checksum = "acda0ebdebc28befa84bee35e651e4c5f09073d668c7aed4cf7e23c3cda84b23" dependencies = [ - "base64 0.21.7", + "base64 0.22.1", "byteorder", "bytes", "fallible-iterator 0.2.0", @@ -2107,9 +2327,9 @@ dependencies = [ [[package]] name = "postgres-types" -version = "0.2.6" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d2234cdee9408b523530a9b6d2d6b373d1db34f6a8e51dc03ded1828d7fb67c" +checksum = "f66ea23a2d0e5734297357705193335e0a957696f34bed2f2faefacb2fec336f" dependencies = [ "array-init", "bytes", @@ -2122,27 +2342,20 @@ dependencies = [ [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "pretty_env_logger" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ - "env_logger", - "log", + "zerocopy", ] [[package]] name = "proc-macro-crate" -version = "3.1.0" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" dependencies = [ - "toml_edit 0.21.1", + "toml_edit", ] [[package]] @@ -2170,18 +2383,50 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" dependencies = [ "unicode-ident", ] [[package]] -name = "quote" -version = "1.0.36" +name = "prost" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools 0.13.0", + "proc-macro2", + "quote", + "syn 2.0.79", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -2258,27 +2503,18 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.4.1" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_syscall" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "redox_users" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ "getrandom", "libredox", @@ -2287,38 +2523,53 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.5" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.8", + "regex-syntax 0.8.5", ] [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.5", ] [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.12.5" +version = "0.12.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" +checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" dependencies = [ "base64 0.22.1", "bytes", @@ -2345,7 +2596,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 1.0.1", "system-configuration", "tokio", "tokio-native-tls", @@ -2354,7 +2605,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "winreg", + "windows-registry", ] [[package]] @@ -2396,9 +2647,13 @@ dependencies = [ [[package]] name = "rs-abbreviation-number" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd44f7c109b548c61eb352d71a96d5026fd14e4c4a542be8fe0b202111229512" +checksum = "6ffbe68a09a59740bcd2d73972601b97c51d82a5ec83cafdbf6ff6883925e017" +dependencies = [ + "time_macro", + "time_macro_derive", +] [[package]] name = "rusqlite" @@ -2422,18 +2677,18 @@ checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" dependencies = [ "bitflags 2.6.0", "errno", @@ -2444,25 +2699,13 @@ dependencies = [ [[package]] name = "rustls" -version = "0.22.4" +version = "0.23.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" +checksum = "5fbb44d7acc4e873d613422379f69f237a1b141928c02f6bc6ccfddddc2d7993" dependencies = [ "log", - "ring", - "rustls-pki-types", - "rustls-webpki", - "subtle", - "zeroize", -] - -[[package]] -name = "rustls" -version = "0.23.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05cff451f60db80f490f3c182b77c35260baace73209e9cdbbe526bfe3a4d402" -dependencies = [ "once_cell", + "ring", "rustls-pki-types", "rustls-webpki", "subtle", @@ -2471,25 +2714,24 @@ dependencies = [ [[package]] name = "rustls-pemfile" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" dependencies = [ - "base64 0.22.1", "rustls-pki-types", ] [[package]] name = "rustls-pki-types" -version = "1.7.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" [[package]] name = "rustls-webpki" -version = "0.102.4" +version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff448f7e92e913c4b7d4c6d8e4540a1724b319b4152b8aef6d4cf8339712b33e" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ "ring", "rustls-pki-types", @@ -2498,9 +2740,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" [[package]] name = "ryu" @@ -2510,11 +2752,11 @@ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "schannel" -version = "0.1.23" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2535,14 +2777,15 @@ dependencies = [ [[package]] name = "sea-query-derive" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a82fcb49253abcb45cdcb2adf92956060ec0928635eb21b4f7a6d8f25ab0bc" +checksum = "9834af2c4bd8c5162f00c89f1701fb6886119a88062cf76fe842ea9e232b9839" dependencies = [ + "darling", "heck", "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", "thiserror", ] @@ -2565,9 +2808,9 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "security-framework" -version = "2.11.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ "bitflags 2.6.0", "core-foundation", @@ -2578,9 +2821,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" +checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" dependencies = [ "core-foundation-sys", "libc", @@ -2594,9 +2837,9 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.203" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] @@ -2612,31 +2855,32 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.203" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] name = "serde_json" -version = "1.0.118" +version = "1.0.131" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4" +checksum = "67d42a0bd4ac281beff598909bb56a86acaf979b84483e1c79c10dcaf98f8cf3" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] [[package]] name = "serde_spanned" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" dependencies = [ "serde", ] @@ -2675,6 +2919,21 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook" version = "0.3.17" @@ -2795,37 +3054,46 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.68" +version = "2.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" +checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + [[package]] name = "sync_wrapper" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] [[package]] name = "system-configuration" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", "core-foundation", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" dependencies = [ "core-foundation-sys", "libc", @@ -2833,50 +3101,68 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.10.1" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" dependencies = [ "cfg-if", "fastrand", + "once_cell", "rustix", - "windows-sys 0.52.0", -] - -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", + "windows-sys 0.59.0", ] [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "time_macro" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65a819dbcae573521af2b2a9bd345b9443619ecfc9b70647ff25d87279dc3b73" + +[[package]] +name = "time_macro_derive" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff7c73ff78b7be9d81624666e675b7ced5e29e620dff9fbd486027872a3feb5" +dependencies = [ + "quote", + "syn 2.0.79", ] [[package]] name = "tinyvec" -version = "1.6.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c55115c6fbe2d2bef26eb09ad74bde02d8255476fc0c7b515ef09fbb35742d82" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" dependencies = [ "tinyvec_macros", ] @@ -2910,7 +3196,7 @@ dependencies = [ "rayon", "rayon-cond", "regex", - "regex-syntax", + "regex-syntax 0.8.5", "serde", "serde_json", "spm_precompiled", @@ -2922,32 +3208,32 @@ dependencies = [ [[package]] name = "tokio" -version = "1.38.0" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" dependencies = [ "backtrace", "bytes", "libc", "mio", - "num_cpus", "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.48.0", + "tracing", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -2962,9 +3248,9 @@ dependencies = [ [[package]] name = "tokio-postgres" -version = "0.7.10" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d340244b32d920260ae7448cb72b6e238bddc3d4f7603394e7dd46ed8e48f5b8" +checksum = "3b5d3742945bc7d7f210693b0c58ae542c6fd47b17adbbda0885f3dcb34a6bdb" dependencies = [ "async-trait", "byteorder", @@ -2992,16 +3278,16 @@ version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ - "rustls 0.23.10", + "rustls", "rustls-pki-types", "tokio", ] [[package]] name = "tokio-stream" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" dependencies = [ "futures-core", "pin-project-lite", @@ -3010,9 +3296,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", @@ -3023,47 +3309,66 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.14" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f49eb2ab21d2f26bd6db7bf383edc527a7ebaee412d17af4d40fdccd442f335" +checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit 0.22.14", + "toml_edit", ] [[package]] name = "toml_datetime" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.21.1" +version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ - "indexmap", - "toml_datetime", - "winnow 0.5.40", -] - -[[package]] -name = "toml_edit" -version = "0.22.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f21c7aaf97f1bd9ca9d4f9e73b0a6c74bd5afef56f2bc931943a6e1c37e04e38" -dependencies = [ - "indexmap", + "indexmap 2.6.0", "serde", "serde_spanned", "toml_datetime", - "winnow 0.6.13", + "winnow", +] + +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64 0.22.1", + "bytes", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "socket2", + "tokio", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", ] [[package]] @@ -3074,24 +3379,43 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", + "indexmap 1.9.3", "pin-project", "pin-project-lite", + "rand", + "slab", "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper 0.1.2", "tower-layer", "tower-service", ] [[package]] name = "tower-layer" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" @@ -3112,7 +3436,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -3122,6 +3446,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -3138,9 +3492,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "ulid" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34778c17965aa2a08913b57e1f34db9b4a63f5de31768b55bf20d2795f921259" +checksum = "04f903f293d11f31c0c29e4148f6dc0d033a7f80cebc0282bea147611667d289" dependencies = [ "getrandom", "rand", @@ -3150,30 +3504,27 @@ dependencies = [ [[package]] name = "unicase" -version = "2.7.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" -dependencies = [ - "version_check", -] +checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" [[package]] name = "unicode-bidi" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" +checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "unicode-normalization" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" dependencies = [ "tinyvec", ] @@ -3189,21 +3540,21 @@ dependencies = [ [[package]] name = "unicode-properties" -version = "0.1.1" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4259d9d4425d9f0661581b804cb85fe66a4c631cadd8f490d1c13a35d5d9291" +checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" [[package]] name = "unicode-segmentation" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.13" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode_categories" @@ -3219,18 +3570,17 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" -version = "2.9.7" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd" +checksum = "b74fc6b57825be3373f7054754755f03ac3a8f5d70015ccad699ba2029956f4a" dependencies = [ "base64 0.22.1", "flate2", "log", "native-tls", "once_cell", - "rustls 0.22.4", + "rustls", "rustls-pki-types", - "rustls-webpki", "serde", "serde_json", "url", @@ -3254,6 +3604,12 @@ version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e8257fbc510f0a46eb602c10215901938b5c2a7d5e70fc11483b1d3c9b5b18c" +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.15" @@ -3262,9 +3618,9 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "want" @@ -3289,34 +3645,35 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.42" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" dependencies = [ "cfg-if", "js-sys", @@ -3326,9 +3683,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3336,28 +3693,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" dependencies = [ "js-sys", "wasm-bindgen", @@ -3375,40 +3732,83 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.3" +version = "0.26.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd7c23921eeb1713a4e851530e9b9756e4fb0e89978582942612524cf09f01cd" +checksum = "841c67bff177718f1d4dfefde8d8f0e78f9b6589319ba88312f567fc5841a958" dependencies = [ "rustls-pki-types", ] [[package]] name = "whoami" -version = "1.5.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44ab49fad634e88f55bf8f9bb3abd2f27d7204172a112c7c9987e01c1c94ea9" +checksum = "372d5b87f58ec45c384ba03563b03544dc5fadc3983e434b286913f5b4a9bb6d" dependencies = [ - "redox_syscall 0.4.1", + "redox_syscall", "wasite", "web-sys", ] [[package]] -name = "winapi-util" -version = "0.1.8" +name = "winapi" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ - "windows-sys 0.52.0", + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", ] +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", ] [[package]] @@ -3426,7 +3826,16 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", ] [[package]] @@ -3446,18 +3855,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.5", - "windows_aarch64_msvc 0.52.5", - "windows_i686_gnu 0.52.5", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.5", - "windows_x86_64_gnu 0.52.5", - "windows_x86_64_gnullvm 0.52.5", - "windows_x86_64_msvc 0.52.5", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -3468,9 +3877,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -3480,9 +3889,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -3492,15 +3901,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -3510,9 +3919,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -3522,9 +3931,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -3534,9 +3943,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -3546,62 +3955,44 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.5.40" +version = "0.6.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" dependencies = [ "memchr", ] -[[package]] -name = "winnow" -version = "0.6.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1" -dependencies = [ - "memchr", -] - -[[package]] -name = "winreg" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - [[package]] name = "xml-rs" -version = "0.8.20" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193" +checksum = "af4e2e2f7cba5a093896c1e150fbfe177d1883e7448200efb81d40b9d339ef26" [[package]] name = "zerocopy" -version = "0.7.34" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ + "byteorder", "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.34" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.68", + "syn 2.0.79", ] [[package]] @@ -3621,7 +4012,7 @@ dependencies = [ "crossbeam-utils", "displaydoc", "flate2", - "indexmap", + "indexmap 2.6.0", "num_enum", "thiserror", ] diff --git a/Cargo.toml b/Cargo.toml index 8098f69..23ee55b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" [dependencies] tokio-postgres = { version = "0.7", features = ["with-chrono-0_4", "with-serde_json-1"] } -tokio = { version = "1", features = ["full"]} +tokio = { version = "1", features = ["full", "tracing"]} chrono = { version = "0.4", features = ["serde"] } anyhow = "1" async-walkdir = "2" @@ -16,8 +16,6 @@ toml = "0.8" serde = { version = "1", features = ["derive"] } reqwest = "0.12" deadpool-postgres = "0.14" -log = "0.4" -pretty_env_logger = "0.5" pgvector = { version = "0.3", features = ["postgres", "halfvec"] } tokenizers = { version = "0.19", features = ["http"] } regex = "1" @@ -42,4 +40,11 @@ im = { version = "15", features = ["serde"] } sea-query = { version = "0.30", features = ["backend-postgres", "postgres-array"] } sea-query-postgres = { version = "0.4", features = ["postgres-array"] } ulid = { version = "1", features = ["serde"] } -mail-parser = "0.9" \ No newline at end of file +mail-parser = "0.9" +pcre2 = "0.2" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +console-subscriber = "0.4" + +[patch.crates-io] +pcre2-sys = { git = "https://github.com/osmarks/rust-pcre2/", rev = "ec7d5cf" } diff --git a/atsc_backend.py b/atsc_backend.py index d14d6b6..47e728d 100644 --- a/atsc_backend.py +++ b/atsc_backend.py @@ -16,10 +16,9 @@ print("Models loaded.") BASE = "/media/" conn = psycopg2.connect("dbname=maghammer user=maghammer") -conn2 = psycopg2.connect("dbname=maghammer user=maghammer") -csr = conn.cursor() -csr2 = conn.cursor() -csr.execute("SELECT id, path FROM media_files WHERE auto_subs_state = 1") # PENDING +with conn.cursor() as csr: + csr.execute("SELECT id, path FROM media_files WHERE auto_subs_state = 1") # PENDING + rows = csr.fetchall() def format_duration(seconds): hours = int(seconds / 3600.0) @@ -29,7 +28,8 @@ def format_duration(seconds): full_seconds = int(seconds) return f"{hours:02}:{minutes:02}:{full_seconds:02}" -while row := csr.fetchone(): +print(f"Processing {len(rows)} files...") +for row in rows: file = row[1] docid = row[0] start = time.time() @@ -55,5 +55,6 @@ while row := csr.fetchone(): subs += f"[{format_duration(seg['start'])} -> {format_duration(seg['end'])}]: {seg['text'].strip()}\n" subs = subs.strip() - csr2.execute("UPDATE media_files SET subs = %s, auto_subs_state = 2 WHERE id = %s", (subs, docid)) # GENERATED - conn2.commit() \ No newline at end of file + with conn.cursor() as csr: + csr.execute("UPDATE media_files SET subs = %s, auto_subs_state = 2 WHERE id = %s", (subs, docid)) # GENERATED + conn.commit() \ No newline at end of file diff --git a/config.toml b/config.toml index 1cca42d..44d1dd4 100644 --- a/config.toml +++ b/config.toml @@ -5,7 +5,7 @@ concurrency = 8 backend = "http://100.64.0.10:1706" embedding_dim = 1024 tokenizer = "Snowflake/snowflake-arctic-embed-l" -max_tokens = 510 +max_tokens = 128 batch_size = 256 [indexers.text_files] @@ -54,4 +54,4 @@ output = "rclwe_dump2" [indexers.browser_history] db_path = "/data/archive/lthist.sqlite3" -include_untimestamped_records = false \ No newline at end of file +include_untimestamped_records = false diff --git a/src/indexer.rs b/src/indexer.rs index 8b0e31b..48520bf 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -1,5 +1,5 @@ use std::{collections::HashSet, sync::Arc}; -use anyhow::{Context, Result}; +use anyhow::Result; use compact_str::CompactString; use deadpool_postgres::Pool; use futures::{pin_mut, TryStreamExt}; @@ -62,14 +62,25 @@ pub trait Indexer: Sync + Send { pub async fn delete_nonexistent_files(ctx: Arc, select_paths: &str, delete_by_id: &str, existing: &HashSet) -> Result<()> { let conn = ctx.pool.get().await?; + let mut conn2 = ctx.pool.get().await?; + let tx = conn2.transaction().await?; let it = conn.query_raw(select_paths, [""; 0]).await?; pin_mut!(it); while let Some(row) = it.try_next().await? { let path: String = row.get(0); let path = CompactString::from(path); if !existing.contains(&path) { - conn.execute(delete_by_id, &[&hash_str(&path)]).await?; + tx.execute(delete_by_id, &[&hash_str(&path)]).await?; } } + tx.commit().await?; Ok(()) -} \ No newline at end of file +} + +impl std::fmt::Debug for dyn Indexer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Indexer") + .field("name", &self.name()) + .finish() + } +} diff --git a/src/indexers/atuin.rs b/src/indexers/atuin.rs index 941648d..102d24d 100644 --- a/src/indexers/atuin.rs +++ b/src/indexers/atuin.rs @@ -6,6 +6,7 @@ use crate::{indexer::{ColumnSpec, Ctx, Indexer, TableSpec}, util::hash_str}; use chrono::prelude::*; use tokio_postgres::{binary_copy::BinaryCopyInWriter, types::Type}; use rusqlite::OpenFlags; +use tracing::instrument; #[derive(Serialize, Deserialize)] struct Config { @@ -47,7 +48,7 @@ CREATE TABLE shell_history ( ColumnSpec { name: "command", fts: true, - fts_short: false, + fts_short: true, trigram: false, is_array: false }, @@ -68,7 +69,7 @@ CREATE TABLE shell_history ( let max_timestamp = conn.query_one("SELECT max(timestamp) FROM shell_history", &[]).await?.get::<_, Option>>(0); let max_timestamp = max_timestamp.map(|x| x.timestamp_nanos_opt().unwrap() + 999).unwrap_or(0); // my code had better not be deployed unchanged in 2262; we have to add 1000 because of Postgres timestamp rounding let bg = tokio::task::spawn_blocking(move || self.read_database(tx, max_timestamp)); - + let sink = conn.copy_in("COPY shell_history (id, timestamp, command, cwd, duration, exit, hostname, session) FROM STDIN BINARY").await?; let writer = BinaryCopyInWriter::new(sink, &[Type::INT8, Type::TIMESTAMPTZ, Type::TEXT, Type::TEXT, Type::INT8, Type::INT4, Type::TEXT, Type::TEXT]); @@ -103,6 +104,7 @@ impl AtuinIndexer { })) } + #[instrument(skip(self, target))] fn read_database(&self, target: tokio::sync::mpsc::Sender<(String, i64, i64, i32, String, String, String, String)>, min_timestamp: i64) -> Result<()> { let conn = rusqlite::Connection::open_with_flags(&self.config.db_path, OpenFlags::SQLITE_OPEN_READ_ONLY)?; let mut fetch_stmt = conn.prepare("SELECT id, timestamp, duration, exit, command, cwd, session, hostname FROM history WHERE timestamp > ? ORDER BY timestamp ASC")?; @@ -122,4 +124,4 @@ impl AtuinIndexer { } Ok(()) } -} \ No newline at end of file +} diff --git a/src/indexers/books.rs b/src/indexers/books.rs index 2e8afbf..2bed191 100644 --- a/src/indexers/books.rs +++ b/src/indexers/books.rs @@ -13,6 +13,7 @@ use crate::util::{hash_str, parse_html, parse_date, systemtime_to_utc, CONFIG}; use crate::indexer::{delete_nonexistent_files, Ctx, Indexer, TableSpec, ColumnSpec}; use chrono::prelude::*; use std::str::FromStr; +use tracing::instrument; #[derive(Serialize, Deserialize, Clone)] struct Config { @@ -35,6 +36,7 @@ pub struct EpubParse { tags: Vec } +#[instrument] pub fn parse_epub(path: &PathBuf) -> Result { let mut epub = EpubDoc::new(path).context("initial doc load")?; @@ -58,6 +60,8 @@ pub fn parse_epub(path: &PathBuf) -> Result { tags: epub.metadata.get("subject").cloned().unwrap_or_else(Vec::new) }; + tracing::trace!("read epub {:?}", path); + let mut seen = HashSet::new(); for navpoint in epub.toc.clone() { let path = navpoint.content; @@ -71,16 +75,18 @@ pub fn parse_epub(path: &PathBuf) -> Result { seen.insert(last.to_string()); let resource = epub.get_resource_str_by_path(last).with_context(|| format!("resource {} nonexistent", last))?; let html = parse_html(resource.as_bytes(), true); + tracing::trace!("read epub chapter {:?}", navpoint.label); parse.chapters.push(( html.0, navpoint.label.clone() - )) + )); } } Ok(parse) } +#[instrument(skip(ctx, existing_files))] async fn handle_epub(relpath: CompactString, ctx: Arc, entry: DirEntry, existing_files: Arc>>) -> Result<()> { let epub = entry.path(); @@ -95,7 +101,7 @@ async fn handle_epub(relpath: CompactString, ctx: Arc, entry: DirEntry, exi let parse_result = match tokio::task::spawn_blocking(move || parse_epub(&epub)).await? { Ok(x) => x, Err(e) => { - log::warn!("Failed parse for {}: {}", relpath, e); + tracing::warn!("Failed parse for {}: {}", relpath, e); return Ok(()) } }; @@ -229,7 +235,7 @@ CREATE TABLE chapters ( let entries = WalkDir::new(&self.config.path); let base_path = &self.config.path; - entries.map_err(|e| anyhow::Error::from(e)).try_for_each_concurrent(Some(CONFIG.concurrency), |entry| + entries.map_err(|e| anyhow::Error::from(e)).try_for_each_concurrent(Some(CONFIG.concurrency), |entry| { let ctx = ctx.clone(); let existing_files = existing_files.clone(); @@ -279,4 +285,4 @@ impl BooksIndexer { config })) } -} \ No newline at end of file +} diff --git a/src/indexers/firefox_history_dump.rs b/src/indexers/firefox_history_dump.rs index db08a95..6814ada 100644 --- a/src/indexers/firefox_history_dump.rs +++ b/src/indexers/firefox_history_dump.rs @@ -5,8 +5,9 @@ use serde::{Deserialize, Serialize}; use crate::{indexer::{ColumnSpec, Ctx, Indexer, TableSpec}, util::hash_str}; use chrono::prelude::*; use tokio_postgres::{binary_copy::BinaryCopyInWriter, types::Type}; -use rusqlite::OpenFlags; +use rusqlite::{types::ValueRef, OpenFlags}; use tokio::sync::mpsc::Sender; +use tracing::instrument; #[derive(Serialize, Deserialize)] struct Config { @@ -63,14 +64,14 @@ CREATE TABLE history ( ColumnSpec { name: "title", fts: true, - fts_short: false, + fts_short: true, trigram: true, is_array: false }, ColumnSpec { name: "description", fts: true, - fts_short: false, + fts_short: true, trigram: false, is_array: false } @@ -162,12 +163,19 @@ impl FirefoxHistoryDumpIndexer { })) } + #[instrument(skip(places, bookmarks, history, self))] fn read_database(&self, places: Sender<(String, String, Option, i64, Option, Option, Option)>, places_min_ts: i64, bookmarks: Sender<(String, String, String, i64, i64)>, bookmarks_min_ts: i64, history: Sender<(String, String, i64, i32)>, history_min_ts: i64) -> Result<()> { let conn = rusqlite::Connection::open_with_flags(&self.config.db_path, OpenFlags::SQLITE_OPEN_READ_ONLY)?; // Apparently some records in my history database have no last_visit_date. I don't know what happened to it or why, but this is not fixable, so add an ugly hack for them. let mut fetch_places = conn.prepare(if self.config.include_untimestamped_records { "SELECT guid, url, title, visit_count, last_visit_date, description, preview_image_url FROM places WHERE last_visit_date > ? OR last_visit_date IS NULL ORDER BY last_visit_date ASC" } else { "SELECT guid, url, title, visit_count, last_visit_date, description, preview_image_url FROM places WHERE last_visit_date > ? OR last_visit_date IS NULL ORDER BY last_visit_date ASC" })?; for row in fetch_places.query_map([places_min_ts], |row| { - Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?, row.get(4)?, row.get(5)?, row.get(6)?)) + let description = row.get_ref(5)?; + let description = match description { + ValueRef::Null => None, + ValueRef::Text(x) => Some(String::from_utf8_lossy(x).to_string()), + _ => panic!("unexpected type") + }; + Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?, row.get(4)?, description, row.get(6)?)) })? { let row = row?; places.blocking_send(row)?; @@ -193,4 +201,4 @@ impl FirefoxHistoryDumpIndexer { std::mem::drop(history); Ok(()) } -} \ No newline at end of file +} diff --git a/src/indexers/mediafiles.rs b/src/indexers/mediafiles.rs index ebef6de..9f01f4e 100644 --- a/src/indexers/mediafiles.rs +++ b/src/indexers/mediafiles.rs @@ -11,9 +11,10 @@ use tokio::process::Command; use tokio::sync::RwLock; use crate::util::{hash_str, parse_date, parse_html, systemtime_to_utc, urlencode, CONFIG}; use crate::indexer::{Ctx, Indexer, TableSpec, delete_nonexistent_files, ColumnSpec}; -use async_walkdir::WalkDir; +use async_walkdir::{Filtering, WalkDir}; use chrono::prelude::*; use regex::{RegexSet, Regex}; +use tracing::instrument; #[derive(Serialize, Deserialize, Debug)] struct Config { @@ -89,6 +90,7 @@ struct Stream { #[derive(Deserialize, Debug)] struct Format { duration: String, + #[serde(default)] tags: HashMap } @@ -106,10 +108,10 @@ lazy_static::lazy_static! { fn parse_duration(s: &str) -> Result { let mat = DURATION_STRING.captures(s).context("duration misformatted")?; - let duration = f32::from_str(mat.get(1).unwrap().as_str())? * 3600.0 + let duration = f32::from_str(mat.get(1).unwrap().as_str())? * 3600.0 + f32::from_str(mat.get(2).unwrap().as_str())? * 60.0 + f32::from_str(&mat.get(3).unwrap().as_str().replace(",", "."))?; - + Ok(duration) } @@ -129,6 +131,7 @@ enum SRTParseState { ExpectData } +#[instrument(skip(srt))] fn parse_srt(srt: &str) -> Result { use SRTParseState::*; @@ -176,7 +179,7 @@ fn parse_filename(path: &PathBuf) -> Option<(String, String, Option, Option let ep = i32::from_str(mat.get(2).unwrap().as_str()).unwrap(); Some((dirname.to_string(), name.to_string(), Some(se), Some(ep))) } else { - Some((dirname.to_string(), stem.to_string(), None, None)) + Some((dirname.to_string(), stem.to_string(), None, None)) } } @@ -208,7 +211,7 @@ mod test { let mut s = String::new(); format_duration(parse_duration("00:01:02,410").unwrap(), &mut s); - assert_eq!(s, "00:01:02.410"); + assert_eq!(s, "00:01:02"); } } @@ -253,7 +256,9 @@ fn score_subtitle_stream(stream: &Stream, others: &Vec, parse_so_far: &M 1 } +#[instrument] async fn parse_media(path: &PathBuf, ignore: Arc) -> Result { + tracing::trace!("starting ffprobe"); let ffmpeg = Command::new("ffprobe") .arg("-hide_banner") .arg("-print_format").arg("json") @@ -266,6 +271,7 @@ async fn parse_media(path: &PathBuf, ignore: Arc) -> Result) -> Result i8::MIN { + tracing::trace!("reading subtitle track {:?}", best_subtitle_track); + let ffmpeg = Command::new("ffmpeg") .arg("-hide_banner").arg("-v").arg("quiet") .arg("-i").arg(path) @@ -463,13 +471,30 @@ CREATE TABLE media_files ( async fn run(&self, ctx: Arc) -> Result<()> { let entries = WalkDir::new(&self.config.path); - let ignore = &self.ignore_files; - let base_path = &self.config.path; + let ignore = Arc::new(self.ignore_files.clone()); + let base_path = Arc::new(self.config.path.clone()); + let base_path_ = base_path.clone(); let ignore_metadata = self.ignore_metadata.clone(); let existing_files = Arc::new(RwLock::new(HashSet::new())); + let existing_files_ = existing_files.clone(); + let ctx_ = ctx.clone(); entries + .filter(move |entry| { + let ignore = ignore.clone(); + let base_path = base_path.clone(); + let path = entry.path(); + tracing::trace!("filtering {:?}", path); + if let Some(path) = path.strip_prefix(&*base_path).ok().and_then(|x| x.to_str()) { + if ignore.is_match(path) { + return std::future::ready(Filtering::IgnoreDir); + } + } else { + return std::future::ready(Filtering::IgnoreDir); + } + std::future::ready(Filtering::Continue) + }) .map_err(|e| anyhow::Error::from(e)) .filter(|r| { // ignore permissions errors because things apparently break otherwise @@ -479,18 +504,20 @@ CREATE TABLE media_files ( }; async move { keep } }) - .try_for_each_concurrent(Some(CONFIG.concurrency), |entry| { - let ctx = ctx.clone(); - let existing_files = existing_files.clone(); + .try_for_each_concurrent(Some(CONFIG.concurrency), move |entry| { + tracing::trace!("got file {:?}", entry.path()); + let ctx = ctx_.clone(); + let base_path = base_path_.clone(); + let existing_files = existing_files_.clone(); let ignore_metadata = ignore_metadata.clone(); async move { let real_path = entry.path(); - let path = if let Some(path) = real_path.strip_prefix(base_path)?.to_str() { + let path = if let Some(path) = real_path.strip_prefix(&*base_path)?.to_str() { path } else { return Result::Ok(()); }; - if ignore.is_match(path) || !entry.file_type().await?.is_file() { + if !entry.file_type().await?.is_file() { return Ok(()); } let mut conn = ctx.pool.get().await?; @@ -499,6 +526,7 @@ CREATE TABLE media_files ( let row = conn.query_opt("SELECT timestamp FROM media_files WHERE id = $1", &[&hash_str(path)]).await?; let timestamp: DateTime = row.map(|r| r.get(0)).unwrap_or(DateTime::::MIN_UTC); let modtime = systemtime_to_utc(metadata.modified()?)?; + tracing::trace!("timestamp {:?}", timestamp); if modtime > timestamp { match parse_media(&real_path, ignore_metadata).await { Ok(x) => { @@ -526,9 +554,11 @@ CREATE TABLE media_files ( tx.commit().await?; }, Err(e) => { - log::warn!("Media parse {}: {:?}", &path, e) + tracing::warn!("Media parse {}: {:?}", &path, e) } } + } else { + tracing::trace!("skipping {:?}", path); } Result::Ok(()) } @@ -556,4 +586,4 @@ impl MediaFilesIndexer { config })) } -} \ No newline at end of file +} diff --git a/src/indexers/minoteaur.rs b/src/indexers/minoteaur.rs index 9158c0f..5d463ca 100644 --- a/src/indexers/minoteaur.rs +++ b/src/indexers/minoteaur.rs @@ -6,6 +6,7 @@ use crate::util::hash_str; use crate::indexer::{Ctx, Indexer, TableSpec, ColumnSpec}; use chrono::prelude::*; use rusqlite::OpenFlags; +use tracing::instrument; #[derive(Serialize, Deserialize)] struct Config { @@ -255,59 +256,9 @@ CREATE TABLE IF NOT EXISTS mino_files ( let last_view_timestamp: DateTime = row.get(2); timestamps.insert(ulid::Ulid::from_string(&ulid)?, (updated, last_view_timestamp)); } - + while let Some((id, object)) = rx.recv().await { - match object { - minoteaur_types::Object::Page(page) => { - // If we already have the latest information on this page, skip it. - if let Some((updated_timestamp, _last_view_timestamp)) = timestamps.get(&id) { - if *updated_timestamp >= page.updated { - continue; - } - } - let ulid = id.to_string(); - let int_id = hash_str(&ulid); - let tx = conn.transaction().await?; - tx.execute("DELETE FROM mino_pages WHERE id = $1", &[&int_id]).await?; - tx.execute("INSERT INTO mino_pages VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, 0, 0, $10)", - &[&int_id, &ulid, &page.updated, &page.created, &page.title, &(page.size.words as i32), &page.tags.into_iter().collect::>(), &page.names.into_iter().collect::>(), &page.content, &page.created]) - .await?; - for (key, value) in page.structured_data { - let (num, text) = match value { - minoteaur_types::Value::Number(x) => (Some(x), None), - minoteaur_types::Value::Text(t) => (None, Some(t)) - }; - tx.execute("INSERT INTO mino_structured_data (page, key, numeric_value, text_value) VALUES ($1, $2, $3, $4)", &[&int_id, &key, &num, &text]).await?; - } - for (_key, file) in page.files { - tx.execute("INSERT INTO mino_files (page, filename, size, timestamp, metadata) VALUES ($1, $2, $3, $4, $5)", &[&int_id, &file.filename, &(file.size as i32), &file.created, &tokio_postgres::types::Json(file.metadata)]).await?; - } - tx.commit().await?; - }, - // These should only occur after the page's record, with the exception of page creation. - minoteaur_types::Object::PageView(view) => { - if let Some((_updated_timestamp, last_view_timestamp)) = timestamps.get(&view.page) { - if *last_view_timestamp >= view.time { - continue; - } - } - let int_id = hash_str(&view.page.to_string()); - conn.execute("UPDATE mino_pages SET view_count = view_count + 1, last_view_timestamp = $2 WHERE id = $1", &[&int_id, &view.time]).await?; - }, - minoteaur_types::Object::Revision(rev) => { - // There's no separate "last revision timestamp" because revisions should always be associated with the updated field being adjusted. - if let Some((updated_timestamp, _last_view_timestamp)) = timestamps.get(&rev.page) { - if *updated_timestamp >= rev.time { - continue; - } - } - if let minoteaur_types::RevisionType::PageCreated = rev.ty { - continue; - } - let int_id = hash_str(&rev.page.to_string()); - conn.execute("UPDATE mino_pages SET revision_count = revision_count + 1 WHERE id = $1", &[&int_id]).await?; - } - } + MinoteaurIndexer::write_object(&mut conn, id, object, ×tamps).await?; } // Minoteaur doesn't have a delete button so not supporting deletes is clearly fine, probably. @@ -345,4 +296,60 @@ impl MinoteaurIndexer { } Ok(()) } -} \ No newline at end of file + + #[instrument(skip(conn, timestamps))] + async fn write_object(conn: &mut tokio_postgres::Client, id: ulid::Ulid, object: minoteaur_types::Object, timestamps: &HashMap, DateTime)>) -> Result<()> { + match object { + minoteaur_types::Object::Page(page) => { + // If we already have the latest information on this page, skip it. + if let Some((updated_timestamp, _last_view_timestamp)) = timestamps.get(&id) { + if *updated_timestamp >= page.updated { + return Ok(()); + } + } + let ulid = id.to_string(); + let int_id = hash_str(&ulid); + let tx = conn.transaction().await?; + tx.execute("DELETE FROM mino_pages WHERE id = $1", &[&int_id]).await?; + tx.execute("INSERT INTO mino_pages VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, 0, 0, $10)", + &[&int_id, &ulid, &page.updated, &page.created, &page.title, &(page.size.words as i32), &page.tags.into_iter().collect::>(), &page.names.into_iter().collect::>(), &page.content, &page.created]) + .await?; + for (key, value) in page.structured_data { + let (num, text) = match value { + minoteaur_types::Value::Number(x) => (Some(x), None), + minoteaur_types::Value::Text(t) => (None, Some(t)) + }; + tx.execute("INSERT INTO mino_structured_data (page, key, numeric_value, text_value) VALUES ($1, $2, $3, $4)", &[&int_id, &key, &num, &text]).await?; + } + for (_key, file) in page.files { + tx.execute("INSERT INTO mino_files (page, filename, size, timestamp, metadata) VALUES ($1, $2, $3, $4, $5)", &[&int_id, &file.filename, &(file.size as i32), &file.created, &tokio_postgres::types::Json(file.metadata)]).await?; + } + tx.commit().await?; + }, + // These should only occur after the page's record, with the exception of page creation. + minoteaur_types::Object::PageView(view) => { + if let Some((_updated_timestamp, last_view_timestamp)) = timestamps.get(&view.page) { + if *last_view_timestamp >= view.time { + return Ok(()); + } + } + let int_id = hash_str(&view.page.to_string()); + conn.execute("UPDATE mino_pages SET view_count = view_count + 1, last_view_timestamp = $2 WHERE id = $1", &[&int_id, &view.time]).await?; + }, + minoteaur_types::Object::Revision(rev) => { + // There's no separate "last revision timestamp" because revisions should always be associated with the updated field being adjusted. + if let Some((updated_timestamp, _last_view_timestamp)) = timestamps.get(&rev.page) { + if *updated_timestamp >= rev.time { + return Ok(()); + } + } + if let minoteaur_types::RevisionType::PageCreated = rev.ty { + return Ok(()); + } + let int_id = hash_str(&rev.page.to_string()); + conn.execute("UPDATE mino_pages SET revision_count = revision_count + 1 WHERE id = $1", &[&int_id]).await?; + } + } + Ok(()) + } +} diff --git a/src/indexers/rclwe.rs b/src/indexers/rclwe.rs index a5dc1c8..35f1eab 100644 --- a/src/indexers/rclwe.rs +++ b/src/indexers/rclwe.rs @@ -2,9 +2,11 @@ use std::{collections::HashMap, path::PathBuf, str::FromStr, sync::Arc}; use anyhow::{Context, Result}; use futures::pin_mut; use serde::{Deserialize, Serialize}; -use crate::{indexer::{ColumnSpec, Ctx, Indexer, TableSpec}, util::{hash_str, parse_html, systemtime_to_utc}}; +use crate::{indexer::{ColumnSpec, Ctx, Indexer, TableSpec}, util::{parse_html, systemtime_to_utc}}; use chrono::prelude::*; use tokio_postgres::{binary_copy::BinaryCopyInWriter, types::Type}; +use tracing::instrument; +use std::pin::Pin; #[derive(Serialize, Deserialize)] struct Config { @@ -82,7 +84,7 @@ CREATE TABLE webpages ( } async fn run(&self, ctx: Arc) -> Result<()> { - let conn = ctx.pool.get().await?; + let conn = ctx.pool.get().await?; let sink = conn.copy_in("COPY webpages (timestamp, url, title, content, html) FROM STDIN BINARY").await?; let input = PathBuf::from(&self.config.input); @@ -94,50 +96,7 @@ CREATE TABLE webpages ( pin_mut!(writer); while let Some(entry) = readdir.next_entry().await? { - if entry.file_type().await?.is_file() { - let path = entry.path(); - let name = path.file_name().unwrap().to_str().context("invalid path")?; - // for some reason there are several filename formats in storage - let mut metadata = None; - if name.starts_with("firefox-recoll-web") { - let meta_file = "_".to_owned() + name; - let mtime = systemtime_to_utc(entry.metadata().await?.modified()?)?; - if let Some(url) = tokio::fs::read_to_string(input.join(&meta_file)).await?.lines().next() { - metadata = Some((mtime, url.to_string())); - tokio::fs::rename(input.join(&meta_file), output.join(&meta_file)).await?; - } else { - log::warn!("Metadata error for {}", name); - } - } - if let Some(rem) = name.strip_suffix(".html") { - let meta_file = format!("{}.dic", rem); - let meta = parse_circache_meta(&tokio::fs::read_to_string(input.join(&meta_file)).await?).context("invalid metadata format")?; - let mtime = i64::from_str(meta.get("fmtime").context("invalid metadata format")?)?; - metadata = Some((DateTime::from_timestamp(mtime, 0).context("time is broken")?, meta.get("url").context("invalid metadata format")?.to_string())); - move_file(&input.join(&meta_file), output.join(&meta_file)).await?; - } - if let Some(rem) = name.strip_prefix("recoll-we-c") { - let meta_file = format!("recoll-we-m{}", rem); - let mtime = systemtime_to_utc(entry.metadata().await?.modified()?)?; - if tokio::fs::try_exists(input.join(&meta_file)).await? { - if let Some(url) = tokio::fs::read_to_string(input.join(&meta_file)).await?.lines().next() { - metadata = Some((mtime, url.to_string())); - move_file(&input.join(&meta_file), output.join(&meta_file)).await?; - } else { - log::warn!("Metadata error for {}", name); - } - } else { - log::warn!("No metadata for {}", name); - } - } - - if let Some((mtime, url)) = metadata { - let html = tokio::fs::read(&path).await?; - move_file(&path, output.join(name)).await?; - let (content, title) = parse_html(html.as_slice(), true); - writer.as_mut().write(&[&mtime, &url, &title.replace("\0", ""), &content.replace("\0", ""), &String::from_utf8_lossy(&html).replace("\0", "")]).await?; - } - } + RclweIndexer::process_entry(entry, &input, &output, &mut writer).await?; } writer.finish().await?; @@ -157,4 +116,57 @@ impl RclweIndexer { config: Arc::new(config) })) } -} \ No newline at end of file + + #[instrument(skip(writer))] + async fn process_entry(entry: tokio::fs::DirEntry, input: &PathBuf, output: &PathBuf, writer: &mut Pin<&mut BinaryCopyInWriter>) -> Result<()> { + if entry.file_type().await?.is_file() { + let path = entry.path(); + let name = path.file_name().unwrap().to_str().context("invalid path")?; + // for some reason there are several filename formats in storage + let mut metadata = None; + if name.starts_with("firefox-recoll-web") { + tracing::trace!("reading firefox-recoll-web"); + let meta_file = "_".to_owned() + name; + let mtime = systemtime_to_utc(entry.metadata().await?.modified()?)?; + if let Some(url) = tokio::fs::read_to_string(input.join(&meta_file)).await?.lines().next() { + metadata = Some((mtime, url.to_string())); + tokio::fs::rename(input.join(&meta_file), output.join(&meta_file)).await?; + } else { + tracing::warn!("Metadata error for {}", name); + } + } + if let Some(rem) = name.strip_suffix(".html") { + tracing::trace!("reading circache html"); + let meta_file = format!("{}.dic", rem); + let meta = parse_circache_meta(&tokio::fs::read_to_string(input.join(&meta_file)).await?).context("invalid metadata format")?; + let mtime = i64::from_str(meta.get("fmtime").context("invalid metadata format")?)?; + metadata = Some((DateTime::from_timestamp(mtime, 0).context("time is broken")?, meta.get("url").context("invalid metadata format")?.to_string())); + move_file(&input.join(&meta_file), output.join(&meta_file)).await?; + } + if let Some(rem) = name.strip_prefix("recoll-we-c") { + tracing::trace!("reading recoll-we-c"); + let meta_file = format!("recoll-we-m{}", rem); + let mtime = systemtime_to_utc(entry.metadata().await?.modified()?)?; + if tokio::fs::try_exists(input.join(&meta_file)).await? { + if let Some(url) = tokio::fs::read_to_string(input.join(&meta_file)).await?.lines().next() { + metadata = Some((mtime, url.to_string())); + move_file(&input.join(&meta_file), output.join(&meta_file)).await?; + } else { + tracing::warn!("Metadata error for {}", name); + } + } else { + tracing::warn!("No metadata for {}", name); + } + } + + if let Some((mtime, url)) = metadata { + let html = tokio::fs::read(&path).await?; + move_file(&path, output.join(name)).await?; + let (content, title) = parse_html(html.as_slice(), true); + writer.as_mut().write(&[&mtime, &url, &title.replace("\0", ""), &content.replace("\0", ""), &String::from_utf8_lossy(&html).replace("\0", "")]).await?; + } + } + + Ok(()) + } +} diff --git a/src/indexers/textfiles.rs b/src/indexers/textfiles.rs index 61bd839..7ab77af 100644 --- a/src/indexers/textfiles.rs +++ b/src/indexers/textfiles.rs @@ -6,13 +6,14 @@ use compact_str::CompactString; use futures::TryStreamExt; use serde::{Deserialize, Serialize}; use tokio::sync::RwLock; +use tracing::instrument; use crate::util::{hash_str, parse_html, parse_pdf, systemtime_to_utc, urlencode, CONFIG}; use crate::indexer::{Ctx, Indexer, TableSpec, delete_nonexistent_files, ColumnSpec}; use async_walkdir::WalkDir; use chrono::prelude::*; use regex::RegexSet; -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Debug)] struct Config { path: String, #[serde(default)] @@ -20,6 +21,7 @@ struct Config { base_url: String } +#[derive(Debug)] pub struct TextFilesIndexer { config: Config, ignore: RegexSet @@ -89,58 +91,11 @@ CREATE TABLE text_files ( let existing_files = Arc::new(RwLock::new(HashSet::new())); - entries.map_err(|e| anyhow::Error::from(e)).try_for_each_concurrent(Some(CONFIG.concurrency), |entry| + entries.map_err(|e| anyhow::Error::from(e)).try_for_each_concurrent(Some(CONFIG.concurrency), |entry| { let ctx = ctx.clone(); let existing_files = existing_files.clone(); - async move { - let real_path = entry.path(); - let path = if let Some(path) = real_path.strip_prefix(base_path)?.to_str() { - path - } else { - return Result::Ok(()); - }; - let ext = real_path.extension().and_then(OsStr::to_str); - if ignore.is_match(path) || !entry.file_type().await?.is_file() || !VALID_EXTENSIONS.contains(ext.unwrap_or_default()) { - return Ok(()); - } - let mut conn = ctx.pool.get().await?; - existing_files.write().await.insert(CompactString::from(path)); - let metadata = entry.metadata().await?; - let row = conn.query_opt("SELECT timestamp FROM text_files WHERE id = $1", &[&hash_str(path)]).await?; - let timestamp: DateTime = row.map(|r| r.get(0)).unwrap_or(DateTime::::MIN_UTC); - let modtime = systemtime_to_utc(metadata.modified()?)?; - if modtime > timestamp { - let parse = match ext { - Some("pdf") => { - parse_pdf(&real_path).await.map(Some) - }, - Some("txt") => { - let content = tokio::fs::read(&real_path).await?; - Ok(Some((String::from_utf8_lossy(&content).to_string(), String::new()))) - }, - Some("htm") | Some("html") | Some("xhtml") => { - let content = tokio::fs::read(&real_path).await?; - Ok(Some(tokio::task::block_in_place(|| parse_html(&content, true)))) - }, - _ => Ok(None), - }; - match parse { - Ok(None) => (), - Ok(Some((content, title))) => { - // Null bytes aren't legal in Postgres strings despite being valid UTF-8. - let tx = conn.transaction().await?; - tx.execute("DELETE FROM text_files WHERE id = $1", &[&hash_str(path)]).await?; - tx.execute("INSERT INTO text_files VALUES ($1, $2, $3, $4, $5)", - &[&hash_str(path), &path, &title.replace("\0", ""), &content.replace("\0", ""), &modtime]) - .await?; - tx.commit().await?; - }, - Err(e) => log::warn!("File parse for {}: {}", path, e) - } - } - Result::Ok(()) - } + TextFilesIndexer::process_file(entry, ctx, ignore, existing_files, base_path) }).await?; { @@ -164,4 +119,59 @@ impl TextFilesIndexer { config })) } -} \ No newline at end of file + + #[instrument(skip(ctx, ignore, existing_files, base_path))] + async fn process_file(entry: async_walkdir::DirEntry, ctx: Arc, ignore: &RegexSet, existing_files: Arc>>, base_path: &String) -> Result<()> { + let real_path = entry.path(); + let path = if let Some(path) = real_path.strip_prefix(base_path)?.to_str() { + path + } else { + return Result::Ok(()); + }; + let ext = real_path.extension().and_then(OsStr::to_str); + if ignore.is_match(path) || !entry.file_type().await?.is_file() || !VALID_EXTENSIONS.contains(ext.unwrap_or_default()) { + return Ok(()); + } + let mut conn = ctx.pool.get().await?; + existing_files.write().await.insert(CompactString::from(path)); + let metadata = entry.metadata().await?; + let row = conn.query_opt("SELECT timestamp FROM text_files WHERE id = $1", &[&hash_str(path)]).await?; + let timestamp: DateTime = row.map(|r| r.get(0)).unwrap_or(DateTime::::MIN_UTC); + let modtime = systemtime_to_utc(metadata.modified()?)?; + if modtime > timestamp { + let parse = TextFilesIndexer::read_file(&real_path, ext).await; + match parse { + Ok(None) => (), + Ok(Some((content, title))) => { + // Null bytes aren't legal in Postgres strings despite being valid UTF-8. + let tx = conn.transaction().await?; + tx.execute("DELETE FROM text_files WHERE id = $1", &[&hash_str(path)]).await?; + tx.execute("INSERT INTO text_files VALUES ($1, $2, $3, $4, $5)", + &[&hash_str(path), &path, &title.replace("\0", ""), &content.replace("\0", ""), &modtime]) + .await?; + tx.commit().await?; + }, + Err(e) => tracing::warn!("File parse for {}: {}", path, e) + } + } + Ok(()) + } + + #[instrument] + async fn read_file(path: &std::path::PathBuf, ext: Option<&str>) -> Result> { + match ext { + Some("pdf") => { + parse_pdf(&path).await.map(Some) + }, + Some("txt") => { + let content = tokio::fs::read(&path).await?; + Ok(Some((String::from_utf8_lossy(&content).to_string(), String::new()))) + }, + Some("htm") | Some("html") | Some("xhtml") => { + let content = tokio::fs::read(&path).await?; + Ok(Some(tokio::task::block_in_place(|| parse_html(&content, true)))) + }, + _ => Ok(None), + } + } +} diff --git a/src/indexers/thunderbird_email.rs b/src/indexers/thunderbird_email.rs index 2f5dc0f..3259c9e 100644 --- a/src/indexers/thunderbird_email.rs +++ b/src/indexers/thunderbird_email.rs @@ -12,6 +12,8 @@ use chrono::prelude::*; use tokio::{fs::File, io::{AsyncBufReadExt, BufReader}}; use mail_parser::MessageParser; use std::future::Future; +use compact_str::CompactString; +use tracing::instrument; #[derive(Serialize, Deserialize)] struct Config { @@ -47,6 +49,7 @@ lazy_static::lazy_static! { static ref MULTIPART_REGEX: regex::bytes::Regex = regex::bytes::RegexBuilder::new(r#"content-type:\s*multipart/(mixed|alternative);\s*boundary="?([A-Za-z0-9=_-]+)"?;?\r\n$"#).case_insensitive(true).build().unwrap(); } +#[instrument(skip(callback))] async fn read_mbox>, F: FnMut(Email) -> U>(path: &PathBuf, mut callback: F) -> Result<()> { let input = File::open(path).await?; let mut buf = Vec::new(); @@ -99,7 +102,7 @@ async fn read_mbox>, F: FnMut(Email) -> U>(path: &Pa current_delim = None; } } - + if current_delim.is_none() && line.starts_with(b"From ") && !buf.is_empty() { if let Ok(Some(mail)) = parse_current(&mut buf) { callback(mail).await?; @@ -153,7 +156,7 @@ CREATE TABLE IF NOT EXISTS emails ( ColumnSpec { name: "subject", fts: true, - fts_short: false, + fts_short: true, trigram: true, is_array: false }, @@ -179,42 +182,14 @@ CREATE TABLE IF NOT EXISTS emails ( while let Some(entry) = entries.try_next().await? { let path = entry.path(); let mbox = path.file_stem().and_then(|x| x.to_str()).context("invalid path")?.to_compact_string(); - let folder = path.parent().unwrap().file_name().unwrap().to_str().unwrap(); - if let Some(account) = config.account_mapping.get(folder) { + let folder = path.parent().unwrap().file_name().unwrap().to_str().unwrap().to_compact_string(); + if let Some(account) = config.account_mapping.get(&*folder) { let account = account.to_compact_string(); let ext = path.extension(); if let None = ext { if !self.config.ignore_mboxes.contains(mbox.as_str()) { let ctx = ctx.clone(); - js.spawn(async move { - read_mbox(&path, move |mail| { - let ctx = ctx.clone(); - let mbox = mbox.trim_end_matches("-1").to_compact_string(); - let account = account.clone(); - async move { - let conn = ctx.pool.get().await?; - let id = hash_thing(&mail.raw.as_slice()); - let body = match mail.body { - Body::Plain(t) => t, - Body::Html(h) => parse_html(h.as_bytes(), false).0 - }; - conn.execute(r#"INSERT INTO emails VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) ON CONFLICT (id) DO UPDATE SET box = $7"#, &[ - &id, - &mail.message_id, - &mail.reply_to, - &mail.date, - &mail.raw, - &account.as_str(), - &mbox.as_str(), - &mail.from, - &mail.from_address, - &mail.subject, - &body - ]).await?; - Ok(()) - } - }).await - }); + js.spawn(EmailIndexer::process_mbox(ctx.clone(), path.clone(), mbox, folder, account.clone())); } } } @@ -239,4 +214,36 @@ impl EmailIndexer { config: Arc::new(config) })) } -} \ No newline at end of file + + #[instrument(skip(ctx))] + async fn process_mbox(ctx: Arc, path: PathBuf, mbox: CompactString, folder: CompactString, account: CompactString) -> Result<()> { + tracing::trace!("reading mailbox"); + read_mbox(&path, move |mail| { + let ctx = ctx.clone(); + let account = account.clone(); + let mbox = mbox.trim_end_matches("-1").to_compact_string(); + async move { + let conn = ctx.pool.get().await?; + let id = hash_thing(&mail.raw.as_slice()); + let body = match mail.body { + Body::Plain(t) => t, + Body::Html(h) => parse_html(h.as_bytes(), false).0 + }; + conn.execute(r#"INSERT INTO emails VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) ON CONFLICT (id) DO UPDATE SET box = $7"#, &[ + &id, + &mail.message_id, + &mail.reply_to, + &mail.date, + &mail.raw, + &account.as_str(), + &mbox.as_str(), + &mail.from, + &mail.from_address, + &mail.subject, + &body + ]).await?; + Ok(()) + } + }).await + } +} diff --git a/src/main.rs b/src/main.rs index 049eaa6..649b6e2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,12 @@ use chrono::{DateTime, Utc}; use compact_str::{CompactString, ToCompactString}; use deadpool_postgres::{Manager, ManagerConfig, RecyclingMethod, Pool}; -use futures::{StreamExt, TryFutureExt}; use indexer::{ColumnSpec, TableSpec}; use pgvector::HalfVector; use semantic::SemanticCtx; -use tokio::signal::unix::{signal, SignalKind}; use tokio_postgres::{NoTls, Row}; use anyhow::{Context, Result}; +use tracing::Instrument; use util::{get_column_string, urlencode}; use std::collections::{BTreeMap, HashMap}; use std::{str::FromStr, sync::Arc, fmt::Write}; @@ -16,6 +15,7 @@ use maud::{html, Markup, Render, DOCTYPE}; use serde::{Deserialize, Serialize}; use rs_abbreviation_number::NumericAbbreviate; use sea_query_postgres::PostgresBinder; +use tracing_subscriber::prelude::*; mod indexer; mod indexers; @@ -40,7 +40,7 @@ CREATE FUNCTION {name}_track() RETURNS trigger AS $$ BEGIN IF NEW IS DISTINCT FROM OLD THEN INSERT INTO {name}_change_tracker VALUES ( - new.id, + new.id, CURRENT_TIMESTAMP ) ON CONFLICT (id) DO UPDATE SET timestamp = CURRENT_TIMESTAMP; @@ -671,7 +671,7 @@ async fn fts_page(state: web::types::State, query: web::types::Quer let prefixed = Arc::new(prefixed); let unprefixed = Arc::new(unprefixed); let mut results = HashMap::new(); - + let mut set = tokio::task::JoinSet::new(); for ix in state.indexers.iter() { for table in ix.tables() { @@ -771,7 +771,9 @@ impl ServerState { #[tokio::main] async fn main() -> Result<()> { - pretty_env_logger::init(); + console_subscriber::ConsoleLayer::builder() + .with_default_env() + .init(); let pg_config = tokio_postgres::Config::from_str(&CONFIG.database)?; let mgr_config = ManagerConfig { recycling_method: RecyclingMethod::Fast }; @@ -779,12 +781,12 @@ async fn main() -> Result<()> { let pool = Pool::builder(mgr).max_size(20).build()?; let indexers: Arc>> = Arc::new(vec![ + indexers::mediafiles::MediaFilesIndexer::new(CONFIG.indexers["media_files"].clone()).await?, indexers::firefox_history_dump::FirefoxHistoryDumpIndexer::new(CONFIG.indexers["browser_history"].clone()).await?, indexers::rclwe::RclweIndexer::new(CONFIG.indexers["rclwe"].clone()).await?, indexers::atuin::AtuinIndexer::new(CONFIG.indexers["atuin"].clone()).await?, indexers::miniflux::MinifluxIndexer::new(CONFIG.indexers["miniflux"].clone()).await?, indexers::thunderbird_email::EmailIndexer::new(CONFIG.indexers["email"].clone()).await?, - indexers::mediafiles::MediaFilesIndexer::new(CONFIG.indexers["media_files"].clone()).await?, indexers::books::BooksIndexer::new(CONFIG.indexers["books"].clone()).await?, indexers::textfiles::TextFilesIndexer::new(CONFIG.indexers["text_files"].clone()).await?, indexers::anki::AnkiIndexer::new(CONFIG.indexers["anki"].clone()).await?, @@ -805,7 +807,7 @@ async fn main() -> Result<()> { for (index, sql) in indexer.schemas().iter().enumerate() { let index = index as i32; if index >= version { - log::info!("Migrating {} to {}.", name, index); + tracing::info!("Migrating {} to {}.", name, index); let tx = conn.transaction().await?; tx.batch_execute(*sql).await.context("execute migration")?; tx.execute("INSERT INTO versions VALUES ($1, $2) ON CONFLICT (indexer) DO UPDATE SET version = $2", &[&name, &(index + 1)]).await.context("update migrations database")?; @@ -825,11 +827,10 @@ async fn main() -> Result<()> { let ctx = Arc::new(indexer::Ctx { pool: pool.clone() }); - log::info!("Indexing: {}.", indexer.name()); - indexer.run(ctx).await.context(indexer.name())?; - log::info!("Building FTS index: {}.", indexer.name()); + tracing::info!("indexing {}", indexer.name()); + indexer.run(ctx).instrument(tracing::info_span!("index", indexer = indexer.name())).await.context(indexer.name())?; + tracing::info!("FTS indexing {}", indexer.name()); semantic::fts_for_indexer(indexer, sctx.clone()).await?; - log::info!("Done: {}.", indexer.name()) } Ok(()) }, @@ -856,7 +857,46 @@ async fn main() -> Result<()> { Ok(()) }).context("init fail") }) - } + }, + "sql" => { + println!("delete semantic indices:"); + for indexer in indexers.iter() { + for table in indexer.tables() { + for column in table.columns { + if column.fts { + println!("DELETE FROM {}_{}_fts_chunks;", table.name, column.name); + println!("DROP INDEX IF EXISTS {}_{}_fts_chunks_embedding_idx;", table.name, column.name); + } + } + + println!("DELETE FROM {}_change_tracker;", table.name); + println!("INSERT INTO {}_change_tracker SELECT id, CURRENT_TIMESTAMP FROM {};", table.name, table.name); + } + } + + println!("create semantic indices:"); + for indexer in indexers.iter() { + for table in indexer.tables() { + for column in table.columns { + if column.fts { + println!("CREATE INDEX {}_{}_fts_chunks_embedding_idx ON {}_{}_fts_chunks USING hnsw (embedding halfvec_ip_ops);", table.name, column.name, table.name, column.name); + } + } + } + } + + println!("create document index:"); + for indexer in indexers.iter() { + for table in indexer.tables() { + for column in table.columns { + if column.fts { + println!("CREATE INDEX {}_{}_fts_chunks_document_idx ON {}_{}_fts_chunks (document);", table.name, column.name, table.name, column.name); + } + } + } + } + Ok(()) + }, _ => Ok(()) } -} \ No newline at end of file +} diff --git a/src/semantic.rs b/src/semantic.rs index b7691c4..5d5d7e7 100644 --- a/src/semantic.rs +++ b/src/semantic.rs @@ -1,5 +1,6 @@ use std::{collections::HashMap, sync::Arc}; use futures::{StreamExt, TryStreamExt}; +use lazy_static::lazy_static; use pgvector::HalfVector; use tokenizers::{tokenizer::{Error, Tokenizer}, Encoding}; use anyhow::{Result, anyhow}; @@ -9,8 +10,15 @@ use tokio::sync::{mpsc, RwLock}; use tokio_stream::wrappers::ReceiverStream; use std::fmt::Write; use half::f16; +use tracing::instrument; -use crate::{indexer::{ColumnSpec, Indexer, TableSpec}, util::{get_column_string, CONFIG}}; +use crate::{indexer::{ColumnSpec, Indexer, TableSpec}, util::{self, get_column_string, CONFIG}}; + +// sorry. +// https://gist.github.com/hanxiao/3f60354cf6dc5ac698bc9154163b4e6a +lazy_static! { + static ref CHUNKING_REGEX: pcre2::bytes::Regex = pcre2::bytes::RegexBuilder::new().utf(true).jit_if_available(true).multi_line(true).build(r#"((?:^(?:[#*=-]{1,7}|\w[^\r\n]{0,200}\r?\n[-=]{2,200}|]{0,100}>)[^\r\n]{1,200}(?:<\/h[1-6]>)?(?:\r?\n|$))|(?:\[[0-9]+\][^\r\n]{1,800})|(?:(?:^|\r?\n)[ \t]{0,3}(?:[-*+•]|\d{1,3}\.\w\.|\[[ xX]\])[ \t]+(?![.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]\s)(?:[^\r\n]{1,200}(?:(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)|(?=[\r\n]|$))|[^\r\n]{1,200}(?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]|(?:'(?=`)|''(?=``)))(?:(?:(?!(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)).){1,100}(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$))?)[\s\]})>,']*(?:(?:\r?\n[ \t]{2,5}(?:[-*+•]|\d{1,3}\.\w\.|\[[ xX]\])[ \t]+(?![.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]\s)(?:[^\r\n]{1,200}(?:(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)|(?=[\r\n]|$))|[^\r\n]{1,200}(?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]|(?:'(?=`)|''(?=``)))(?:(?:(?!(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)).){1,100}(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$))?)[\s\]})>,']*){0,6}(?:\r?\n[ \t]{4,7}(?:[-*+•]|\d{1,3}\.\w\.|\[[ xX]\])[ \t]+(?![.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]\s)(?:[^\r\n]{1,200}(?:(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)|(?=[\r\n]|$))|[^\r\n]{1,200}(?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]|(?:'(?=`)|''(?=``)))(?:(?:(?!(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)).){1,100}(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$))?)[\s\]})>,']*){0,6})?)|(?:(?:^>(?:>|\s{2,}){0,2}(?![.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]\s)(?:[^\r\n]{1,200}(?:(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)|(?=[\r\n]|$))|[^\r\n]{1,200}(?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]|(?:'(?=`)|''(?=``)))(?:(?:(?!(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)).){1,100}(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$))?)[\s\]})>,']*\r?\n?){1,15})|(?:(?:^|\r?\n)(?:```|~~~)(?:\w{0,20})?\r?\n[\s\S]{0,1500}?(?:```|~~~)\r?\n?|(?:(?:^|\r?\n)(?: {4}|\t)[^\r\n]{0,200}(?:\r?\n(?: {4}|\t)[^\r\n]{0,200}){0,20}\r?\n?)|(?:
(?:)?[\s\S]{0,1500}?(?:<\/code>)?<\/pre>))|(?:(?:^|\r?\n)(?:\|[^\r\n]{0,200}\|(?:\r?\n\|[-:]{1,200}\|){0,1}(?:\r?\n\|[^\r\n]{0,200}\|){0,20}|[\s\S]{0,2000}?<\/table>))|(?:^(?:[-*_]){3,}\s*$|)|(?![\s\]})>,'])(?:^(?:<[a-zA-Z][^>]{0,100}>)?(?![.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]\s)(?:[^\r\n]{1,800}(?:(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)|(?=[\r\n]|$))|[^\r\n]{1,800}(?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]|(?:'(?=`)|''(?=``)))(?:(?:(?!(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)).){1,100}(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$))?)[\s\]})>,']*(?:<\/[a-zA-Z]+>)?(?:\r?\n|$))|(?![\s\]})>,'])(?![.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]\s)(?:[^\r\n]{1,400}(?:(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)|(?=[\r\n]|$))|[^\r\n]{1,400}(?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]|(?:'(?=`)|''(?=``)))(?:(?:(?!(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)).){1,100}(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$))?)[\s\]})>,']*|(?:(?,'])(?:(?:^|\r?\n\r?\n)(?:

)?(?![.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]\s)(?:[^\r\n]{1,1000}(?:(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)|(?=[\r\n]|$))|[^\r\n]{1,1000}(?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]|(?:'(?=`)|''(?=``)))(?:(?:(?!(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)).){1,100}(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$))?)[\s\]})>,']*(?:<\/p>)?(?=\r?\n\r?\n|$))|(?:<[a-zA-Z][^>]{0,100}(?:>[\s\S]{0,1000}?<\/[a-zA-Z]+>|\s*\/>))|(?:(?:\$\$[\s\S]{0,500}?\$\$)|(?:\$[^\$\r\n]{0,100}\$))|(?![\s\]})>,'])(?![.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]\s)(?:[^\r\n]{1,800}(?:(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)|(?=[\r\n]|$))|[^\r\n]{1,800}(?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]|(?:'(?=`)|''(?=``)))(?:(?:(?!(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$)).){1,100}(?:[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}](?,'](?=[.!?…]|\.{3}|[\x{2026}\x{2047}-\x{2049}]|[\p{Emoji_Presentation}\p{Extended_Pictographic}]))|(?:'(?=`)|''(?=``)))(?=\S|$))?)[\s\]})>,']*)"#).unwrap(); +} #[derive(Deserialize, Serialize, Clone, Debug)] pub struct SemanticSearchConfig { @@ -31,24 +39,52 @@ pub fn load_tokenizer() -> Result { Ok(tokenizer) } +#[instrument(skip(t))] pub fn tokenize_chunk_text(t: &Tokenizer, s: &str) -> Result> { - let enc = t.encode(s, false).map_err(convert_tokenizer_error)?; let mut result = vec![]; - let mut write = |enc: &Encoding| -> Result<()> { - if !enc.get_offsets().is_empty() { - let offsets: Vec<(usize, usize)> = enc.get_offsets().into_iter().copied().filter(|(a, b)| *a != 0 || *b != 0).collect(); - result.push(( - offsets[0].0, - offsets[offsets.len() - 1].1, - t.decode(enc.get_ids(), true).map_err(convert_tokenizer_error)? - )); - } - Ok(()) - }; - write(&enc)?; - for overflowing in enc.get_overflowing() { - write(overflowing)?; + + let mut raw_regions = vec![]; + + // use inscrutable semantic chunking regex, then use tokenizer to split in case some chunks are too long still + for mat in CHUNKING_REGEX.find_iter(s.as_bytes()) { + let mat = mat?; + let range = mat.start()..mat.end(); + let region = &s[range]; + raw_regions.push((mat.start(), t.encode(region, false).map_err(convert_tokenizer_error)?.len())); } + + raw_regions.push((s.len(), CONFIG.semantic.max_tokens)); + + let mut new_chunk_start = 0; + let mut new_chunk_token_length = 0; + + for ((_region_start, token_length), (next_region_start, next_token_length)) in raw_regions.iter().zip(raw_regions.iter().skip(1)) { + new_chunk_token_length += token_length; + if new_chunk_token_length + next_token_length > util::CONFIG.semantic.max_tokens { + let enc = t.encode(&s[new_chunk_start..*next_region_start], false).map_err(convert_tokenizer_error)?; + + let mut write = |enc: &Encoding| -> Result<()> { + if !enc.get_offsets().is_empty() { + let offsets: Vec<(usize, usize)> = enc.get_offsets().into_iter().copied().filter(|(a, b)| *a != 0 || *b != 0).collect(); + result.push(( + new_chunk_start + offsets[0].0, + new_chunk_start + offsets[offsets.len() - 1].1, + t.decode(enc.get_ids(), true).map_err(convert_tokenizer_error)? + )); + } + Ok(()) + }; + + write(&enc)?; + for overflowing in enc.get_overflowing() { + write(overflowing)?; + } + + new_chunk_start = *next_region_start; + new_chunk_token_length = 0; + } + } + Ok(result) } @@ -66,6 +102,7 @@ fn decode_fp16_buffer(buf: &[u8]) -> Vec { .collect() } +#[instrument(skip(client))] async fn send_batch(client: &Client, batch: Vec<&str>) -> Result>> { let res = client.post(&CONFIG.semantic.backend) .body(rmp_serde::to_vec_named(&EmbeddingRequest { text: batch })?) @@ -84,6 +121,7 @@ struct Chunk { text: String } +#[derive(Debug)] pub struct SemanticCtx { tokenizer: Tokenizer, client: reqwest::Client, @@ -98,6 +136,7 @@ impl SemanticCtx { // This is only called when we have all chunks for a document ready, so we delete the change record // and all associated FTS chunks. +#[instrument(skip(table, ctx, chunks))] async fn insert_fts_chunks(id: i64, chunks: Vec<(Chunk, Vec)>, table: &TableSpec, ctx: Arc) -> Result<()> { let mut conn = ctx.pool.get().await?; let tx = conn.transaction().await?; @@ -127,6 +166,7 @@ pub async fn embed_query(q: &str, ctx: Arc) -> Result<(HalfVector, Ok((HalfVector::from(result.next().unwrap()), HalfVector::from(result.next().unwrap()))) } +#[instrument(skip(ctx))] pub async fn fts_for_indexer(i: &Box, ctx: Arc) -> Result<()> { let conn = ctx.pool.get().await?; for table in i.tables() { @@ -163,7 +203,10 @@ pub async fn fts_for_indexer(i: &Box, ctx: Arc) -> Res for (i, col) in fts_columns.iter().enumerate() { let s: Option = get_column_string(&row, i + 1, col); if let Some(s) = s { - let chunks = tokio::task::block_in_place(|| tokenize_chunk_text(&ctx.tokenizer, &s))?; + let ctx = ctx.clone(); + let chunks = tokio::task::spawn_blocking(move || { + tokenize_chunk_text(&ctx.tokenizer, &s) + }).await??; for chunk in chunks { buffer.push(Chunk { id, @@ -227,5 +270,5 @@ pub async fn fts_for_indexer(i: &Box, ctx: Arc) -> Res #[test] fn test_tokenize() { - println!("{:?}", tokenize_chunk_text(&load_tokenizer().unwrap(), "test input")); -} \ No newline at end of file + println!("{:?}", tokenize_chunk_text(&load_tokenizer().unwrap(), include_str!("../tokenizer_test_input.txt"))); +} diff --git a/src/util.rs b/src/util.rs index dd6cbb9..dfe864a 100644 --- a/src/util.rs +++ b/src/util.rs @@ -6,6 +6,7 @@ use seahash::SeaHasher; use serde::{Serialize, Deserialize}; use tokio_postgres::Row; use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; +use tracing::instrument; const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); @@ -57,6 +58,7 @@ lazy_static::lazy_static! { static ref SPACE_ON_NEWLINES: Regex = Regex::new(r"\n\s+").unwrap(); } +#[instrument(skip(html))] pub fn parse_html(html: &[u8], prefer_title_tag: bool) -> (String, String) { use html5gum::Token; @@ -135,6 +137,7 @@ pub fn parse_html(html: &[u8], prefer_title_tag: bool) -> (String, String) { (NEWLINES.replace_all(&SPACE_ON_NEWLINES.replace_all(&text, "\n"), "\n\n").trim().to_string(), title) } +#[instrument] pub async fn parse_pdf(path: &PathBuf) -> Result<(String, String)> { // Rust does not seem to have a robust library for this. let res = tokio::process::Command::new("pdftotext") @@ -175,4 +178,4 @@ pub fn get_column_string(row: &Row, index: usize, spec: &ColumnSpec) -> Option String { utf8_percent_encode(s, FRAGMENT).to_string() -} \ No newline at end of file +}