1
0
mirror of https://github.com/osmarks/website synced 2024-12-23 08:30:44 +00:00

Fix partly broken FTS bigram generation

This commit is contained in:
osmarks 2024-09-07 09:42:11 +01:00
parent 4cb1cba03e
commit cb2f69da0d

View File

@ -61,7 +61,7 @@ export const build = () => {
const [a, b] = bigram.split(BIGRAM_SEPARATOR, 2) const [a, b] = bigram.split(BIGRAM_SEPARATOR, 2)
// bigram provides no useful information if term is unique anyway // bigram provides no useful information if term is unique anyway
// want ascending order (lower is better) // want ascending order (lower is better)
if (totalTermCounts[a] === 1 || !totalTermCounts[b] === 1) { return 0 } if (totalTermCounts[a] === 1 || totalTermCounts[b] === 1) { return 0 }
return -(count / totalBigrams) / ((totalTermCounts[a] / totalTerms) * (totalTermCounts[b] / totalTerms)) return -(count / totalBigrams) / ((totalTermCounts[a] / totalTerms) * (totalTermCounts[b] / totalTerms))
} }
const pmis = new Map(Object.entries(totalBigramCounts).map(([k, v]) => [k, pmi(k, v)])) const pmis = new Map(Object.entries(totalBigramCounts).map(([k, v]) => [k, pmi(k, v)]))
@ -96,4 +96,4 @@ export const build = () => {
console.log(`Total terms: ${totalTerms}`) console.log(`Total terms: ${totalTerms}`)
console.log(`Total bigrams: ${totalBigrams}`) console.log(`Total bigrams: ${totalBigrams}`)
return msgpack.pack(records) return msgpack.pack(records)
} }