diff --git a/go.mod b/go.mod index 486b813..a089e10 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/go-ini/ini v1.67.0 github.com/gorilla/feeds v1.2.0 github.com/gorilla/mux v1.8.1 + github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/rivo/uniseg v0.4.7 golang.org/x/crypto v0.31.0 golang.org/x/term v0.27.0 diff --git a/go.sum b/go.sum index 67a8856..6f718c2 100644 --- a/go.sum +++ b/go.sum @@ -10,6 +10,8 @@ github.com/gorilla/feeds v1.2.0 h1:O6pBiXJ5JHhPvqy53NsjKOThq+dNFm8+DFrxBEdzSCc= github.com/gorilla/feeds v1.2.0/go.mod h1:WMib8uJP3BbY+X8Szd1rA5Pzhdfh+HCCAYT2z7Fza6Y= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= diff --git a/internal/cfg/config.go b/internal/cfg/config.go index 0f06069..5854eb2 100644 --- a/internal/cfg/config.go +++ b/internal/cfg/config.go @@ -50,6 +50,8 @@ var ( Motds []string OverrideLogin string + + UserModelBackendURL string ) // WikiDir is a full path to the wiki storage directory, which also must be a @@ -59,17 +61,18 @@ var WikiDir string // Config represents a Mycorrhiza wiki configuration file. This type is used // only when reading configs. type Config struct { - WikiName string `comment:"This name appears in the header and on various pages."` - NaviTitleIcon string `comment:"This icon is used in the breadcrumbs bar."` + WikiName string `comment:"This name appears in the header and on various pages."` + NaviTitleIcon string `comment:"This icon is used in the breadcrumbs bar."` Hyphae Network Authorization - CustomScripts `comment:"You can specify additional scripts to load on different kinds of pages, delimited by a comma ',' sign."` - Telegram `comment:"You can enable Telegram authorization. Follow these instructions: https://core.telegram.org/widgets/login#setting-up-a-bot"` - ReplaceFrom []string - ReplaceTo []string - Motds []string - OverrideLogin string + CustomScripts `comment:"You can specify additional scripts to load on different kinds of pages, delimited by a comma ',' sign."` + Telegram `comment:"You can enable Telegram authorization. Follow these instructions: https://core.telegram.org/widgets/login#setting-up-a-bot"` + ReplaceFrom []string + ReplaceTo []string + Motds []string + OverrideLogin string + UserModelBackendURL string } // Hyphae is a section of Config which has fields related to special hyphae. @@ -206,6 +209,7 @@ func ReadConfigFile(path string) error { ReplaceTo = cfg.ReplaceTo Motds = cfg.Motds OverrideLogin = cfg.OverrideLogin + UserModelBackendURL = cfg.UserModelBackendURL // This URL makes much more sense. If no URL is set or the protocol is forgotten, assume HTTP. if URL == "" { diff --git a/internal/hyphae/iterators.go b/internal/hyphae/iterators.go index 022b8f1..3ba357a 100644 --- a/internal/hyphae/iterators.go +++ b/internal/hyphae/iterators.go @@ -26,6 +26,14 @@ func YieldExistingHyphae() chan ExistingHypha { return ch } +func GetAll() []ExistingHypha { + out := []ExistingHypha{} + for _, h := range byNames { + out = append(out, h) + } + return out +} + // FilterHyphaeWithText filters the source channel and yields only those hyphae than have text parts. func FilterHyphaeWithText(src chan ExistingHypha) chan ExistingHypha { // TODO: reimplement as a function with a callback? diff --git a/misc/handlers.go b/misc/handlers.go index ab8c9ff..aecd237 100644 --- a/misc/handlers.go +++ b/misc/handlers.go @@ -2,12 +2,15 @@ package misc import ( + "bytes" + "encoding/json" "io" "log/slog" "math/rand" "mime" "net/http" "path/filepath" + "time" "github.com/gorilla/mux" @@ -98,9 +101,49 @@ func handlerUpdateHeaderLinks(w http.ResponseWriter, rq *http.Request) { http.Redirect(w, rq, "/", http.StatusSeeOther) } +var hclient http.Client = http.Client{ + Timeout: 1 * time.Second, +} + +type ModelReq struct { + Trace []string `json:"trace"` + AllNames []string `json:"all_names"` +} + +func accessModelBackend(trace []string, allNames []string) (*string, error) { + jsonData, err := json.Marshal(ModelReq { + Trace: trace, + AllNames: allNames, + }) + if err != nil { + return nil, err + } + + r, err := hclient.Post(cfg.UserModelBackendURL, "application/json", bytes.NewBuffer(jsonData)) + if err != nil { + return nil, err + } + + reqBody, err := io.ReadAll(r.Body) + if err != nil { + return nil, err + } + + var output string + err = json.Unmarshal(reqBody, &output) + if err != nil { + return nil, err + } + + return &output, nil +} + // handlerRandom redirects to a random hypha. func handlerRandom(w http.ResponseWriter, rq *http.Request) { util.PrepareRq(rq) + + trace := util.ReadTrace(rq) + var ( randomHyphaName string amountOfHyphae = hyphae.Count() @@ -110,13 +153,28 @@ func handlerRandom(w http.ResponseWriter, rq *http.Request) { viewutil.HttpErr(viewutil.MetaFrom(w, rq), http.StatusNotFound, cfg.HomeHypha, lc.Get("ui.random_no_hyphae_tip")) return } - i := rand.Intn(amountOfHyphae) - for h := range hyphae.YieldExistingHyphae() { - if i == 0 { - randomHyphaName = h.CanonicalName() - } - i-- + + hyphae := hyphae.GetAll() + + allNames := []string{} + + for _, h := range hyphae { + allNames = append(allNames, h.CanonicalName()) } + + var res *string + var err error + if cfg.UserModelBackendURL != "" { + res, err = accessModelBackend(trace, allNames) + } + + if err != nil || res == nil || (res != nil && *res == "") { + i := rand.Intn(amountOfHyphae) + randomHyphaName = hyphae[i].CanonicalName() + } else { + randomHyphaName = *res + } + http.Redirect(w, rq, "/hypha/"+randomHyphaName, http.StatusSeeOther) } diff --git a/util/util.go b/util/util.go index a70bc3c..675197b 100644 --- a/util/util.go +++ b/util/util.go @@ -3,10 +3,14 @@ package util import ( "crypto/rand" "encoding/hex" + "hash/fnv" "log/slog" "net/http" "strings" "time" + insecureRand "math/rand" + + "github.com/hashicorp/golang-lru/v2" "github.com/bouncepaw/mycorrhiza/internal/cfg" "github.com/bouncepaw/mycorrhiza/internal/files" @@ -164,3 +168,99 @@ func GetMotd() string { dayIndex := now / 86400 return cfg.Motds[dayIndex%int64(len(cfg.Motds))] } + +func RequestHeaderFingerprint(rq *http.Request) uint64 { + fprintHeaders := []string{"accept", "accept-encoding", "accept-language", "dnt", "host", "user-agent", "x-tls-fp"} + hasher := fnv.New64() + + for _, hdr := range fprintHeaders { + if value := rq.Header.Get(hdr); value != "" { + hasher.Write([]byte(hdr)) + hasher.Write([]byte(value)) + } + } + + return hasher.Sum64() +} + +var ipLookup *lru.Cache[string, uint64] +var fprintLookup *lru.Cache[uint64, uint64] + +func EstimateTrackingIdentifier(rq *http.Request) uint64 { + if ipLookup == nil || fprintLookup == nil { + var err error + ipLookup, err = lru.New[string, uint64](1<<20) + if err != nil { + slog.Error("cache create failed?", "error", err) + } + // golang... + fprintLookup, err = lru.New[uint64, uint64](1<<20) + if err != nil { + slog.Error("cache create failed?", "error", err) + } + } + + ip := strings.Split(rq.RemoteAddr, ":")[0] + if val := rq.Header.Get("x-forwarded-for"); val != "" { + ip = val + } + + fp := RequestHeaderFingerprint(rq) + + // Try to look up by IP address. If that fails, look up by fingerprint, and update the record for that IP address. + // If that also fails, assign a random identifier to both. + id := insecureRand.Uint64() + if cid, ok := ipLookup.Get(ip); ok { + id = cid + } else { + if cid, ok := fprintLookup.Get(fp); ok { + id = cid + ipLookup.Add(ip, cid) + } else { + ipLookup.Add(ip, id) + fprintLookup.Add(fp, id) + } + } + + return id +} + +var visitHistory *lru.Cache[uint64, []string] +const maxTraceLen int = 64 + +func EnsureVisitHistoryExists() { + if visitHistory == nil { + var err error + visitHistory, err = lru.New[uint64, []string](1<<18) + if err != nil { + slog.Error("cache create failed?", "error", err) + } + } +} + +func WriteTrace(rq *http.Request, hyphaName string) { + EnsureVisitHistoryExists() + + trk := EstimateTrackingIdentifier(rq) + trace := []string{} + if htrace, ok := visitHistory.Get(trk); ok { + trace = htrace + } + trace = append(trace, hyphaName) + if len(trace) > maxTraceLen { + trace = trace[1:] + } + visitHistory.Add(trk, trace) +} + +func ReadTrace(rq *http.Request) []string { + EnsureVisitHistoryExists() + + trk := EstimateTrackingIdentifier(rq) + trace := []string{} + if htrace, ok := visitHistory.Get(trk); ok { + trace = htrace + } + + return trace +} diff --git a/web/readers.go b/web/readers.go index e99ce59..ff3adc3 100644 --- a/web/readers.go +++ b/web/readers.go @@ -218,6 +218,7 @@ func handlerBinary(w http.ResponseWriter, rq *http.Request) { // handlerHypha is the main hypha action that displays the hypha and the binary upload form along with some navigation. func handlerHypha(w http.ResponseWriter, rq *http.Request) { util.PrepareRq(rq) + var ( hyphaName = util.HyphaNameFromRq(rq, "page", "hypha") h = hyphae.ByName(hyphaName) @@ -243,6 +244,9 @@ func handlerHypha(w http.ResponseWriter, rq *http.Request) { "IsMediaHypha": false, } ) + + util.WriteTrace(rq, hyphaName) + slog.Info("reading hypha", "name", h.CanonicalName(), "can edit", data["GivenPermissionToModify"]) meta.BodyAttributes = map[string]string{ "cats": category_list,