1
0
mirror of https://github.com/osmarks/mycorrhiza.git synced 2025-04-20 09:43:16 +00:00

full text search, better metadata

This commit is contained in:
osmarks 2025-03-07 10:26:46 +00:00
parent 703e56339d
commit 132c1a0572
15 changed files with 195 additions and 181 deletions

View File

@ -1,150 +0,0 @@
package auth
import (
"fmt"
"io"
"log"
"mime"
"net/http"
"github.com/bouncepaw/mycorrhiza/viewutil"
"github.com/gorilla/mux"
"github.com/bouncepaw/mycorrhiza/cfg"
"github.com/bouncepaw/mycorrhiza/l18n"
"github.com/bouncepaw/mycorrhiza/user"
"github.com/bouncepaw/mycorrhiza/util"
)
func InitAuth(r *mux.Router) {
r.HandleFunc("/user-list", handlerUserList)
r.HandleFunc("/lock", handlerLock)
// The check below saves a lot of extra checks and lines of codes in other places in this file.
if !cfg.UseAuth {
return
}
if cfg.AllowRegistration {
r.HandleFunc("/register", handlerRegister).Methods(http.MethodPost, http.MethodGet)
}
r.HandleFunc("/login", handlerLogin)
r.HandleFunc("/logout", handlerLogout)
}
func handlerUserList(w http.ResponseWriter, rq *http.Request) {
lc := l18n.FromRequest(rq)
w.Header().Set("Content-Type", mime.TypeByExtension(".html"))
w.WriteHeader(http.StatusOK)
w.Write([]byte(viewutil.Base(viewutil.MetaFrom(w, rq), lc.Get("ui.users_title"), UserList(lc), map[string]string{})))
}
func handlerLock(w http.ResponseWriter, rq *http.Request) {
_, _ = io.WriteString(w, Lock(l18n.FromRequest(rq)))
}
// handlerRegister displays the register form (GET) or registers the user (POST).
func handlerRegister(w http.ResponseWriter, rq *http.Request) {
lc := l18n.FromRequest(rq)
util.PrepareRq(rq)
if rq.Method == http.MethodGet {
_, _ = io.WriteString(
w,
viewutil.Base(
viewutil.MetaFrom(w, rq),
lc.Get("auth.register_title"),
Register(rq),
map[string]string{},
),
)
return
}
var (
username = rq.PostFormValue("username")
password = rq.PostFormValue("password")
err = user.Register(username, password, "editor", "local", false)
)
if err != nil {
log.Printf("Failed to register %s: %s", username, err.Error())
w.Header().Set("Content-Type", mime.TypeByExtension(".html"))
w.WriteHeader(http.StatusBadRequest)
_, _ = io.WriteString(
w,
viewutil.Base(
viewutil.MetaFrom(w, rq),
lc.Get("auth.register_title"),
fmt.Sprintf(
`<main class="main-width"><p>%s</p><p><a href="/register">%s<a></p></main>`,
err.Error(),
lc.Get("auth.try_again"),
),
map[string]string{},
),
)
return
}
log.Printf("Successfully registered %s", username)
if err := user.LoginDataHTTP(w, username, password); err != nil {
return
}
http.Redirect(w, rq, "/"+rq.URL.RawQuery, http.StatusSeeOther)
}
// handlerLogout shows the logout form (GET) or logs the user out (POST).
func handlerLogout(w http.ResponseWriter, rq *http.Request) {
if rq.Method == http.MethodGet {
var (
u = user.FromRequest(rq)
can = u != nil
lc = l18n.FromRequest(rq)
)
w.Header().Set("Content-Type", "text/html;charset=utf-8")
if can {
log.Println("User", u.Name, "tries to log out")
w.WriteHeader(http.StatusOK)
} else {
log.Println("Unknown user tries to log out")
w.WriteHeader(http.StatusForbidden)
}
_, _ = io.WriteString(
w,
viewutil.Base(viewutil.MetaFrom(w, rq), lc.Get("auth.logout_title"), Logout(can, lc), map[string]string{}),
)
} else if rq.Method == http.MethodPost {
user.LogoutFromRequest(w, rq)
http.Redirect(w, rq, "/", http.StatusSeeOther)
}
}
// handlerLogin shows the login form (GET) or logs the user in (POST).
func handlerLogin(w http.ResponseWriter, rq *http.Request) {
lc := l18n.FromRequest(rq)
if rq.Method == http.MethodGet {
w.Header().Set("Content-Type", "text/html;charset=utf-8")
w.WriteHeader(http.StatusOK)
_, _ = io.WriteString(
w,
viewutil.Base(
viewutil.MetaFrom(w, rq),
lc.Get("auth.login_title"),
Login(lc),
map[string]string{},
),
)
} else if rq.Method == http.MethodPost {
var (
username = util.CanonicalName(rq.PostFormValue("username"))
password = rq.PostFormValue("password")
err = user.LoginDataHTTP(w, username, password)
)
if err != nil {
w.Header().Set("Content-Type", "text/html;charset=utf-8")
w.WriteHeader(http.StatusInternalServerError)
_, _ = io.WriteString(w, viewutil.Base(viewutil.MetaFrom(w, rq), err.Error(), LoginError(err.Error(), lc), map[string]string{}))
return
}
http.Redirect(w, rq, "/", http.StatusSeeOther)
}
}

2
go.mod
View File

@ -7,6 +7,7 @@ require (
github.com/go-ini/ini v1.67.0
github.com/gorilla/feeds v1.2.0
github.com/gorilla/mux v1.8.1
github.com/rivo/uniseg v0.4.7
github.com/valyala/quicktemplate v1.7.0
golang.org/x/crypto v0.31.0
golang.org/x/term v0.27.0
@ -14,6 +15,7 @@ require (
)
require (
github.com/dchest/stemmer v0.0.0-20161207102402-66719a20c4b5 // indirect
github.com/stretchr/testify v1.7.0 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
golang.org/x/sys v0.28.0 // indirect

4
go.sum
View File

@ -4,6 +4,8 @@ github.com/andybalholm/brotli v1.0.2/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu
github.com/andybalholm/brotli v1.0.3/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dchest/stemmer v0.0.0-20161207102402-66719a20c4b5 h1:Y8zPZQaUm5jRBMBbvSoPbQa8HCCORmJ6tkkyvvgNucM=
github.com/dchest/stemmer v0.0.0-20161207102402-66719a20c4b5/go.mod h1:19PoDJeUsXOb2qtHJB7Az1NI0hlRe5wQM77Vo7rbUY8=
github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A=
github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
@ -19,6 +21,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=

View File

@ -2,12 +2,18 @@
package backlinks
import (
"github.com/bouncepaw/mycorrhiza/internal/hyphae"
"log/slog"
"os"
"sort"
"time"
"strings"
"unicode"
"github.com/bouncepaw/mycorrhiza/util"
"github.com/bouncepaw/mycorrhiza/internal/hyphae"
"github.com/bouncepaw/mycorrhiza/history"
"github.com/rivo/uniseg"
"github.com/dchest/stemmer/porter2"
)
// yieldHyphaBacklinks gets backlinks for the desired hypha, sorts and yields them one by one.
@ -38,22 +44,131 @@ func RunBacklinksConveyor() {
}
}
var ZeroTime time.Time
type Metadata struct {
Outlinks []string
Bytes int
Words int
Updated time.Time
}
var backlinkIndex = make(map[string]linkSet)
var forwardIndex = make(map[string]Metadata)
var invertedIndex = make(map[string]map[string]int)
func scrubInvertedIndexEntry(hyphaName string, tokens []string) {
for _, token := range tokens {
if tmap, exists := invertedIndex[token]; exists {
delete(tmap, hyphaName)
}
}
}
func writeTokensToInvertedIndex(hyphaName string, tokens []string) {
for _, token := range tokens {
if tmap, exists := invertedIndex[token]; exists {
tmap[hyphaName] += 1
} else {
tmap := make(map[string]int)
tmap[hyphaName] = 1
invertedIndex[token] = tmap
}
}
}
func containsAlnum(s string) bool {
for _, c := range s {
if unicode.IsLetter(c) || unicode.IsNumber(c) {
return true
}
}
return false
}
func tokenize(s string) []string {
eng := porter2.Stemmer
tokens := make([]string, 0)
state := -1
remainder := s
var c string
for len(remainder) > 0 {
c, remainder, state = uniseg.FirstWordInString(remainder, state)
if containsAlnum(c) {
token := eng.Stem(strings.ToLower(c))
tokens = append(tokens, token)
}
}
return tokens
}
func generateMetadata(h hyphae.Hypha, content string) {
tokens := tokenize(content)
meta := Metadata{
Outlinks: extractHyphaLinksFromContent(h.CanonicalName(), content),
Bytes: len(content),
Words: len(tokens),
}
forwardIndex[h.CanonicalName()] = meta
}
func updateRevTimestamp(h hyphae.Hypha, newTime time.Time) {
if _, exists := forwardIndex[h.CanonicalName()]; exists {
// ??? Golang ?????
meta := forwardIndex[h.CanonicalName()]
meta.Updated = newTime
forwardIndex[h.CanonicalName()] = meta
}
}
// IndexBacklinks traverses all text hyphae, extracts links from them and forms an initial index. Call it when indexing and reindexing hyphae.
func IndexBacklinks() {
// It is safe to ignore the mutex, because there is only one worker.
for h := range hyphae.FilterHyphaeWithText(hyphae.YieldExistingHyphae()) {
foundLinks := extractHyphaLinksFromContent(h.CanonicalName(), fetchText(h))
for h := range hyphae.YieldExistingHyphae() {
content := fetchText(h)
foundLinks := extractHyphaLinksFromContent(h.CanonicalName(), content)
for _, link := range foundLinks {
if _, exists := backlinkIndex[link]; !exists {
backlinkIndex[link] = make(linkSet)
}
backlinkIndex[link][h.CanonicalName()] = struct{}{}
}
generateMetadata(h, content)
if revs, err := history.Revisions(h.CanonicalName()); err == nil {
// sorted newest first
if len(revs) > 0 {
updateRevTimestamp(h, revs[0].Time)
}
}
writeTokensToInvertedIndex(h.CanonicalName(), tokenize(util.BeautifulName(h.CanonicalName())))
writeTokensToInvertedIndex(h.CanonicalName(), tokenize(content))
}
}
func Search(query string) []string {
tokens := tokenize(query)
result := make(map[string]int)
for _, token := range tokens {
if documents, exists := invertedIndex[token]; exists {
for name, termFrequency := range documents {
result[name] += termFrequency
}
}
}
// TODO: actually use the tf
sortedResult := make([]string, 0)
for name, _ := range result {
sortedResult = append(sortedResult, name)
}
sort.Strings(sortedResult)
return sortedResult
}
// BacklinksCount returns the amount of backlinks to the hypha. Pass canonical names.
func BacklinksCount(hyphaName string) int {
if links, exists := backlinkIndex[hyphaName]; exists {
@ -70,6 +185,10 @@ func BacklinksFor(hyphaName string) []string {
return backlinks
}
func MetadataFor(hyphaName string) Metadata {
return forwardIndex[hyphaName]
}
func Orphans() []string {
var orphans []string
for h := range hyphae.YieldExistingHyphae() {
@ -124,6 +243,8 @@ type backlinkIndexEdit struct {
name string
oldLinks []string
newLinks []string
content string
oldContent string
}
// apply changes backlink index respective to the operation data
@ -143,12 +264,20 @@ func (op backlinkIndexEdit) apply() {
backlinkIndex[link][op.name] = struct{}{}
}
}
hyp := hyphae.ByName(op.name)
generateMetadata(hyp, op.content)
// wrong, but close enough
updateRevTimestamp(hyp, time.Now())
scrubInvertedIndexEntry(op.name, tokenize(op.oldContent))
writeTokensToInvertedIndex(op.name, tokenize(op.content))
}
// backlinkIndexDeletion contains data for backlink index update after a hypha deletion
type backlinkIndexDeletion struct {
name string
links []string
name string
links []string
content string
}
// apply changes backlink index respective to the operation data
@ -158,6 +287,10 @@ func (op backlinkIndexDeletion) apply() {
delete(lSet, op.name)
}
}
delete(forwardIndex, op.name)
scrubInvertedIndexEntry(op.name, tokenize(op.content))
scrubInvertedIndexEntry(op.name, tokenize(util.BeautifulName(op.name)))
}
// backlinkIndexRenaming contains data for backlink index update after a hypha renaming
@ -165,6 +298,7 @@ type backlinkIndexRenaming struct {
oldName string
newName string
links []string
content string
}
// apply changes backlink index respective to the operation data
@ -175,4 +309,9 @@ func (op backlinkIndexRenaming) apply() {
backlinkIndex[link][op.newName] = struct{}{}
}
}
scrubInvertedIndexEntry(op.oldName, tokenize(op.content))
scrubInvertedIndexEntry(op.oldName, tokenize(util.BeautifulName(op.oldName)))
writeTokensToInvertedIndex(op.newName, tokenize(op.content))
writeTokensToInvertedIndex(op.newName, tokenize(util.BeautifulName(op.newName)))
}

View File

@ -13,25 +13,22 @@ import (
// UpdateBacklinksAfterEdit is a creation/editing hook for backlinks index
func UpdateBacklinksAfterEdit(h hyphae.Hypha, oldText string) {
oldLinks := extractHyphaLinksFromContent(h.CanonicalName(), oldText)
newLinks := extractHyphaLinks(h)
backlinkConveyor <- backlinkIndexEdit{h.CanonicalName(), oldLinks, newLinks}
contents := fetchText(h)
newLinks := extractHyphaLinksFromContent(h.CanonicalName(), contents)
backlinkConveyor <- backlinkIndexEdit{h.CanonicalName(), oldLinks, newLinks, contents, oldText}
}
// UpdateBacklinksAfterDelete is a deletion hook for backlinks index
func UpdateBacklinksAfterDelete(h hyphae.Hypha, oldText string) {
oldLinks := extractHyphaLinksFromContent(h.CanonicalName(), oldText)
backlinkConveyor <- backlinkIndexDeletion{h.CanonicalName(), oldLinks}
backlinkConveyor <- backlinkIndexDeletion{h.CanonicalName(), oldLinks, oldText}
}
// UpdateBacklinksAfterRename is a renaming hook for backlinks index
func UpdateBacklinksAfterRename(h hyphae.Hypha, oldName string) {
actualLinks := extractHyphaLinks(h)
backlinkConveyor <- backlinkIndexRenaming{oldName, h.CanonicalName(), actualLinks}
}
// extractHyphaLinks extracts hypha links from a desired hypha
func extractHyphaLinks(h hyphae.Hypha) []string {
return extractHyphaLinksFromContent(h.CanonicalName(), fetchText(h))
contents := fetchText(h)
actualLinks := extractHyphaLinksFromContent(h.CanonicalName(), contents)
backlinkConveyor <- backlinkIndexRenaming{oldName, h.CanonicalName(), actualLinks, contents}
}
// extractHyphaLinksFromContent extracts local hypha links from the provided text.

View File

@ -48,6 +48,8 @@ var (
ReplaceTo []string
Motds []string
OverrideLogin string
)
// WikiDir is a full path to the wiki storage directory, which also must be a
@ -67,6 +69,7 @@ type Config struct {
ReplaceFrom []string
ReplaceTo []string
Motds []string
OverrideLogin string
}
// Hyphae is a section of Config which has fields related to special hyphae.
@ -202,6 +205,7 @@ func ReadConfigFile(path string) error {
ReplaceFrom = cfg.ReplaceFrom
ReplaceTo = cfg.ReplaceTo
Motds = cfg.Motds
OverrideLogin = cfg.OverrideLogin
// This URL makes much more sense. If no URL is set or the protocol is forgotten, assume HTTP.
if URL == "" {

View File

@ -20,6 +20,9 @@ func CanProceed(rq *http.Request, route string) bool {
// FromRequest returns user from `rq`. If there is no user, an anon user is returned instead.
func FromRequest(rq *http.Request) *User {
if cfg.OverrideLogin != "" {
return ByName(cfg.OverrideLogin)
}
username, ok := rq.Header["X-Webauth-User"]
if !ok || len(username) < 1 {
return EmptyUser()

View File

@ -50,8 +50,6 @@ func main() {
// TODO: keep all crashes in main rather than somewhere there
viewutil.Init()
hyphae.Index(files.HyphaeDir())
backlinks.IndexBacklinks()
go backlinks.RunBacklinksConveyor()
user.InitUserDatabase()
if err := history.Start(); err != nil {
os.Exit(1)
@ -66,6 +64,8 @@ func main() {
if err := interwiki.Init(); err != nil {
os.Exit(1)
}
backlinks.IndexBacklinks()
go backlinks.RunBacklinksConveyor()
// Static files:
static.InitFS(files.StaticFiles())

View File

@ -57,11 +57,12 @@ func handlerList(w http.ResponseWriter, rq *http.Request) {
}
close(hyphaNames)
for hyphaName := range sortedHypha {
metadata := backlinks.MetadataFor(hyphaName)
switch h := hyphae.ByName(hyphaName).(type) {
case *hyphae.TextualHypha:
entries = append(entries, listDatum{h.CanonicalName(), ""})
entries = append(entries, listDatum{h.CanonicalName(), "", metadata})
case *hyphae.MediaHypha:
entries = append(entries, listDatum{h.CanonicalName(), filepath.Ext(h.MediaFilePath())[1:]})
entries = append(entries, listDatum{h.CanonicalName(), filepath.Ext(h.MediaFilePath())[1:], metadata})
}
}
viewList(viewutil.MetaFrom(w, rq), entries)
@ -176,10 +177,16 @@ func handlerTitleSearch(w http.ResponseWriter, rq *http.Request) {
query = rq.FormValue("q")
hyphaName = util.CanonicalName(query)
_, nameFree = hyphae.AreFreeNames(hyphaName)
results []string
results []listDatum
)
for hyphaName := range shroom.YieldHyphaNamesContainingString(query) {
results = append(results, hyphaName)
for _, hyphaName := range backlinks.Search(query) {
metadata := backlinks.MetadataFor(hyphaName)
switch h := hyphae.ByName(hyphaName).(type) {
case *hyphae.TextualHypha:
results = append(results, listDatum{h.CanonicalName(), "", metadata})
case *hyphae.MediaHypha:
results = append(results, listDatum{h.CanonicalName(), filepath.Ext(h.MediaFilePath())[1:], metadata})
}
}
w.WriteHeader(http.StatusOK)
viewTitleSearch(viewutil.MetaFrom(w, rq), query, hyphaName, !nameFree, results)

View File

@ -9,8 +9,10 @@
<li>
<a href="/hypha/{{.Name}}">{{beautifulName .Name}}</a>
{{if .Ext}}<span class="media-type-badge">{{.Ext}}</span>{{end}}
{{if .Metadata.Words}}<span class="media-type-badge">{{.Metadata.Words}} words</span>{{end}}
{{if .Metadata.Updated}}<span class="media-type-badge">{{.Metadata.Updated.Format "2006-01-02 15:04"}}</span>{{end}}
</li>
{{end}}
</ol>
</main>
{{end}}
{{end}}

View File

@ -10,7 +10,10 @@
<ol>
{{range .Results}}
<li>
<a class="wikilink" href="/hypha/{{.}}">{{beautifulName .}}</a>
<a href="/hypha/{{.Name}}">{{beautifulName .Name}}</a>
{{if .Ext}}<span class="media-type-badge">{{.Ext}}</span>{{end}}
{{if .Metadata.Words}}<span class="media-type-badge">{{.Metadata.Words}} words</span>{{end}}
{{if .Metadata.Updated}}<span class="media-type-badge">{{.Metadata.Updated.Format "2006-01-02 15:04"}}</span>{{end}}
</li>
{{end}}
</ol>
@ -18,4 +21,4 @@
<p>{{block "search no results" .}}No results{{end}}</p>
{{end}}
</main>
{{end}}
{{end}}

View File

@ -4,6 +4,7 @@ import (
"embed"
"github.com/bouncepaw/mycorrhiza/internal/hyphae"
"github.com/bouncepaw/mycorrhiza/internal/backlinks"
"github.com/bouncepaw/mycorrhiza/web/viewutil"
)
@ -27,8 +28,9 @@ func initViews() {
}
type listDatum struct {
Name string
Ext string
Name string
Ext string
Metadata backlinks.Metadata
}
type listData struct {
@ -48,12 +50,12 @@ func viewList(meta viewutil.Meta, entries []listDatum) {
type titleSearchData struct {
*viewutil.BaseData
Query string
Results []string
Results []listDatum
MatchedHyphaName string
HasExactMatch bool
}
func viewTitleSearch(meta viewutil.Meta, query string, hyphaName string, hasExactMatch bool, results []string) {
func viewTitleSearch(meta viewutil.Meta, query string, hyphaName string, hasExactMatch bool, results []listDatum) {
viewutil.ExecutePage(meta, chainTitleSearch, titleSearchData{
BaseData: &viewutil.BaseData{},
Query: query,

View File

@ -30,7 +30,7 @@
<li class="top-bar__section top-bar__section_search">
<form class="top-bar__search" method="GET" action="/title-search">
<input type="text" name="q" class="top-bar__search-bar"
placeholder="{{block `search by title` .}}Search by title{{end}}">
placeholder="{{block `search by title` .}}Search{{end}}">
</form>
</li>
<li class="top-bar__section top-bar__section_auth">

View File

@ -44,6 +44,7 @@
ol:not(.mycomarkup-doc ol) a { text-decoration: none; display: inline-block; padding: .25rem; }
ol:not(.mycomarkup-doc ol) a:hover { text-decoration: underline; }
.media-type-badge { font-size: smaller; color: #999; }
.media-type-badge:not(:first-of-type)::before { content: " • "; }
/* General element positions, from small to big */
/* Phones and whatnot */

View File

@ -30,7 +30,7 @@
<li class="top-bar__section top-bar__section_search">
<form class="top-bar__search" method="GET" action="/title-search">
<input type="text" name="q" class="top-bar__search-bar"
placeholder="{{block `search by title` .}}Search by title{{end}}">
placeholder="{{block `search by title` .}}Search{{end}}">
</form>
</li>
<li class="top-bar__section top-bar__section_auth">