From fac7a4d5ac4c5acafda9f97851807e35d57c4154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Ci=C4=99=C5=BCarkiewicz?= Date: Sun, 10 May 2020 23:59:43 -0700 Subject: [PATCH] Compact results --- src/index.rs | 56 +++++++++++++++++++++++++++++++++++++++++++++++++--- src/main.rs | 16 ++++++++++++--- src/page.rs | 5 +++-- 3 files changed, 69 insertions(+), 8 deletions(-) diff --git a/src/index.rs b/src/index.rs index 9f0d57d..5154bcb 100644 --- a/src/index.rs +++ b/src/index.rs @@ -6,24 +6,30 @@ use async_trait::async_trait; use log::info; use std::collections::{HashMap, HashSet}; +/// Indexing wrapper over `page::Store` +/// +/// `Index` keeps track of page data neccessary +/// to quickly look them up by a tag query. #[derive(Default)] pub struct Index { page_ids_by_tag: HashMap>, - tags_by_page_id: HashMap>, + tags_by_page_id: HashMap>, title_by_page_id: HashMap, store: T, } +/// Basic page info #[derive(Debug, Clone)] pub struct PageInfo { pub id: Id, pub title: String, } +/// Results of tag query lookup #[derive(Default, Debug, Clone)] pub struct FindResults { pub matching_pages: Vec, - pub matching_tags: Vec, + pub matching_tags: Vec, } impl FindResults { @@ -32,6 +38,14 @@ impl FindResults { } } +/// More compact (post-processed) `FindResults` +pub struct CompactResults { + // all tags that were not already filtered on + pub tags: Vec<(Tag, usize)>, + // all pages that can't be reached by one of the `tags` + pub pages: Vec, +} + impl Index where T: page::StoreMut, @@ -49,6 +63,7 @@ where Ok(s) } + /// Index the inner `Store` async fn index_inner(&mut self) -> Result<()> { let mut count = 0; let ids = self.store.iter().await?.collect::>(); @@ -60,9 +75,44 @@ where info!("Indexed {} pages", count); Ok(()) } + + /// Compact the results to a shorter form + pub fn compact_results(&self, results: FindResults) -> CompactResults { + let matching_tags: HashSet = results.matching_tags.iter().cloned().collect(); + let mut unmatched_tags: HashMap = Default::default(); + for page_info in &results.matching_pages { + for page_tag in &self.tags_by_page_id[&page_info.id] { + if !matching_tags.contains(page_tag.as_str()) { + *unmatched_tags.entry(page_tag.to_owned()).or_default() += 1; + } + } + } + + let unmatched_tags_set: HashSet = unmatched_tags.keys().cloned().collect(); + + let mut pages: Vec = results + .matching_pages + .into_iter() + .filter(|page_info| { + unmatched_tags_set + .intersection(&self.tags_by_page_id[&page_info.id]) + .next() + .is_none() + }) + .collect(); + + pages.sort_by(|a, b| a.title.cmp(&b.title)); + + let mut tags: Vec<_> = unmatched_tags.into_iter().collect(); + + tags.sort_by(|a, b| a.1.cmp(&b.1).reverse().then_with(|| a.0.cmp(&b.0))); + + CompactResults { tags, pages } + } } impl Index { + /// Lookup pages with a list of tags pub fn find(&self, tags: &[TagRef]) -> FindResults { let mut matching_pages: Vec = vec![]; let mut matching_tags: Vec = vec![]; @@ -146,7 +196,7 @@ impl Index { .tags_by_page_id .get(&id) .cloned() - .unwrap_or_else(|| vec![]) + .unwrap_or_else(|| HashSet::new()) { self.page_ids_by_tag .get_mut(&tag) diff --git a/src/main.rs b/src/main.rs index 209562c..a872348 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,7 @@ use warp::{path::FullPath, Filter}; use serde_derive::Deserialize; -use page::StoreMut; +use page::{StoreMut, Tag}; /// Command line options mod cli; @@ -157,7 +157,10 @@ fn render_page_view(page: &page::Parsed) -> impl RenderOnce { } } -fn render_post_list(posts: impl Iterator + 'static) -> impl RenderOnce { +fn render_post_list( + unmatched_tags: impl Iterator, + posts: impl Iterator + 'static, +) -> impl RenderOnce { owned_html! { div(class="pure-menu pure-menu-horizontal") { form(action="..", method="get", class="pure-menu-item") { @@ -173,6 +176,11 @@ fn render_post_list(posts: impl Iterator + 'static) -> i } } ul { + @ for tag in unmatched_tags { + li { + a(href=format!("./{}", tag.0)) : format!("{} ({})", tag.0, tag.1) + } + } @ for post in posts { li { a(href=format!("?id={}", post.id)) : post.title @@ -358,8 +366,10 @@ async fn handle_get( query.edit.is_some(), )))) } else { + let compact_results = read.compact_results(results); Ok(warp_reply_from_render(render_html_page(render_post_list( - results.matching_pages.into_iter(), + compact_results.tags.into_iter(), + compact_results.pages.into_iter(), )))) } } diff --git a/src/page.rs b/src/page.rs index 61c9b82..36a15d5 100644 --- a/src/page.rs +++ b/src/page.rs @@ -3,6 +3,7 @@ pub mod store; #[allow(unused)] use anyhow::Result; use lazy_static::lazy_static; +use std::collections::HashSet; pub use store::{InMemoryStore, Store, StoreMut}; use digest::Digest; @@ -23,7 +24,7 @@ pub struct Parsed { pub source_body: String, pub html: String, pub headers: Headers, - pub tags: Vec, + pub tags: HashSet, pub title: String, } @@ -165,7 +166,7 @@ impl Parsed { html: html_output, source_body: body, source: Source(source), - tags, + tags: tags.into_iter().collect(), title, } }