new post and tweaks

2025-09-08 13:25:58 +00:00 · 2025-07-20 22:54:48 +01:00
parent 9a651933e4
commit 925f10ff48
10 changed files with 193 additions and 19 deletions
--- a/links_cache.json
+++ b/links_cache.json
@@ -4330,5 +4330,125 @@
        "date": null,
        "website": "GitHub",
        "auto": true
+    },
+    "https://en.wikipedia.org/wiki/List_of_tallest_buildings_and_structures_in_London": {
+        "excerpt": "City of London skyline in 2024",
+        "title": "List of tallest buildings and structures in London",
+        "author": "Contributors to Wikimedia projects",
+        "date": "2003-02-07T14:52:09Z",
+        "website": "Wikimedia Foundation, Inc.",
+        "auto": true
+    },
+    "https://www.london.gov.uk/who-we-are/what-london-assembly-does/questions-mayor/find-an-answer/major-road-network": {
+        "excerpt": "Questions to the Mayor: Please provide details of the roads in London that form part of the Major Road Network (MRN). Please include the name of the road, the length of the road within London, which highway authority is responsible for the road and the percentage of the roads in London that form part of the MRN?",
+        "title": "Major Road Network",
+        "author": null,
+        "date": null,
+        "website": "London City Hall",
+        "auto": true
+    },
+    "https://www.london.gov.uk/who-we-are/what-london-assembly-does/questions-mayor/find-an-answer/average-distance-travelled-person-mode-london": {
+        "excerpt": "Questions to the Mayor: Using data showing travel in London by mode of transport, could you provide an estimate of the average distance in kilometres travelled per person by mode in each year since 2016?",
+        "title": "Average distance travelled by person per mode in London",
+        "author": null,
+        "date": null,
+        "website": "London City Hall",
+        "auto": true
+    },
+    "https://www.businessinsider.com/the-8-fastest-elevators-in-the-world-2013-1?op=1": {
+        "excerpt": "Check out the eight fastest elevators worldwide, showcasing engineering marvels that redefine vertical transportation.",
+        "title": "Asian Skyscrapers Dominate A New List Of The World's Fastest Elevators",
+        "author": "Megan Willett-Wei",
+        "date": "2013-01-23T17:07:46Z",
+        "website": "Business Insider",
+        "auto": true
+    },
+    "https://www.wired.com/story/thyssenkrupp-multi-maglev-elevator/": {
+        "excerpt": "ThyssenKrupp's Multi elevator that can travel horizontally, diagonally as well as vertically",
+        "title": "The Wonkavator is real: ThyssenKrupp unveils its maglev elevator that 'runs like the Tube'",
+        "author": "Bonnie Christian",
+        "date": "2017-06-22T06:00:01.000-04:00",
+        "website": "WIRED",
+        "auto": true
+    },
+    "https://vitalik.eth.limo/general/2023/04/14/traveltime.html": {
+        "excerpt": "Dark Mode Toggle",
+        "title": "Travel time ~= 750 * distance ^ 0.6",
+        "author": null,
+        "date": null,
+        "website": null,
+        "auto": true
+    },
+    "https://idlewords.com/talks/fan_is_a_tool_using_animal.htm": {
+        "excerpt": "In 1967, Gene Roddenberry launched a TV show that had a massive cultural impact. While it wasn’t a hit during its original run, it kindled the imagination in a way few other television programs had.  The story of an attractive, pan-ethnic crew roaming the galaxy, solving moral dilemmas in tight uniforms, had a powerful appeal.",
+        "title": "Fan Is A Tool-Using Animal—dConstruct Conference Talk",
+        "author": null,
+        "date": null,
+        "website": null,
+        "auto": true
+    },
+    "https://arxiv.org/abs/2212.10496": {
+        "excerpt": "While dense retrieval has been shown effective and efficient across tasks and languages, it remains difficult to create effective fully zero-shot dense retrieval systems when no relevance label is available. In this paper, we recognize the difficulty of zero-shot learning and encoding relevance. Instead, we propose to pivot through Hypothetical Document Embeddings~(HyDE). Given a query, HyDE first zero-shot instructs an instruction-following language model (e.g. InstructGPT) to generate a hypothetical document. The document captures relevance patterns but is unreal and may contain false details. Then, an unsupervised contrastively learned encoder~(e.g. Contriever) encodes the document into an embedding vector. This vector identifies a neighborhood in the corpus embedding space, where similar real documents are retrieved based on vector similarity. This second step ground the generated document to the actual corpus, with the encoder's dense bottleneck filtering out the incorrect details. Our experiments show that HyDE significantly outperforms the state-of-the-art unsupervised dense retriever Contriever and shows strong performance comparable to fine-tuned retrievers, across various tasks (e.g. web search, QA, fact verification) and languages~(e.g. sw, ko, ja).",
+        "title": "Precise Zero-Shot Dense Retrieval without Relevance Labels",
+        "author": "[Submitted on 20 Dec 2022]",
+        "date": null,
+        "website": "arXiv.org",
+        "auto": true
+    },
+    "https://arxiv.org/abs/2204.10628": {
+        "excerpt": "Knowledge-intensive language tasks require NLP systems to both provide the correct answer and retrieve supporting evidence for it in a given corpus. Autoregressive language models are emerging as the de-facto standard for generating answers, with newer and more powerful systems emerging at an astonishing pace. In this paper we argue that all this (and future) progress can be directly applied to the retrieval problem with minimal intervention to the models' architecture. Previous work has explored ways to partition the search space into hierarchical structures and retrieve documents by autoregressively generating their unique identifier. In this work we propose an alternative that doesn't force any structure in the search space: using all ngrams in a passage as its possible identifiers. This setup allows us to use an autoregressive model to generate and score distinctive ngrams, that are then mapped to full passages through an efficient data structure. Empirically, we show this not only outperforms prior autoregressive approaches but also leads to an average improvement of at least 10 points over more established retrieval solutions for passage-level retrieval on the KILT benchmark, establishing new state-of-the-art downstream performance on some datasets, while using a considerably lighter memory footprint than competing systems. Code and pre-trained models at https://github.com/facebookresearch/SEAL.",
+        "title": "Autoregressive Search Engines: Generating Substrings as Document Identifiers",
+        "author": "[Submitted on 22 Apr 2022]",
+        "date": null,
+        "website": "arXiv.org",
+        "auto": true
+    },
+    "https://github.com/amoffat/supertag": {
+        "excerpt": "A tag-based filesystem. Contribute to amoffat/supertag development by creating an account on GitHub.",
+        "title": "GitHub - amoffat/supertag: A tag-based filesystem",
+        "author": "amoffat",
+        "date": null,
+        "website": "GitHub",
+        "auto": true
+    },
+    "https://gwern.net/design": {
+        "excerpt": "Meta page describing Gwern.net, the self-documenting website’s implementation and experiments for better ‘semantic zoom’ of hypertext; technical decisions using Markdown and static hosting.",
+        "title": "Design Of This Website",
+        "author": "Gwern",
+        "date": null,
+        "website": null,
+        "auto": true
+    },
+    "https://en.wikipedia.org/wiki/Word2vec": {
+        "excerpt": "Word2vec is a technique in natural language processing (NLP) for obtaining vector representations of words. These vectors capture information about the meaning of the word based on the surrounding words. The word2vec algorithm estimates these representations by modeling text in a large corpus. Once trained, such a model can detect synonymous words or suggest additional words for a partial sentence. Word2vec was developed by Tomáš Mikolov, Kai Chen, Greg Corrado, Ilya Sutskever and Jeff Dean at Google, and published in 2013.[1][2]",
+        "title": "Word2vec",
+        "author": "Contributors to Wikimedia projects",
+        "date": "2015-08-14T22:22:48Z",
+        "website": "Wikimedia Foundation, Inc.",
+        "auto": true
+    },
+    "https://github.com/dpc/tagwiki": {
+        "excerpt": "A wiki in which you link to pages by specifing hashtags they contain. - dpc/tagwiki",
+        "title": "GitHub - dpc/tagwiki: A wiki in which you link to pages by specifing hashtags they contain.",
+        "author": "dpc",
+        "date": null,
+        "website": "GitHub",
+        "auto": true
+    },
+    "https://dynalist.io/": {
+        "excerpt": "Dynalist lets you organize your ideas and tasks in simple lists. It's powerful, yet easy to use. Try the live demo now, no need to sign up.",
+        "title": "Home - Dynalist",
+        "author": null,
+        "date": null,
+        "website": null,
+        "auto": true
+    },
+    "https://cameronharwick.com/writing/related-posts-in-wordpress-with-vector-embedding/": {
+        "excerpt": "Since my 10 year old related posts plugin can’t even be downloaded anymore because of a security vulnerability, I figure it’s time to bring…",
+        "title": "Related Posts in WordPress with Vector Embedding",
+        "author": null,
+        "date": null,
+        "website": null,
+        "auto": true
    }
 }