1
0
mirror of https://github.com/osmarks/website synced 2024-12-23 08:30:44 +00:00

Fix partly broken FTS bigram generation

This commit is contained in:
osmarks 2024-09-07 09:42:11 +01:00
parent 4cb1cba03e
commit cb2f69da0d

View File

@ -61,7 +61,7 @@ export const build = () => {
const [a, b] = bigram.split(BIGRAM_SEPARATOR, 2)
// bigram provides no useful information if term is unique anyway
// want ascending order (lower is better)
if (totalTermCounts[a] === 1 || !totalTermCounts[b] === 1) { return 0 }
if (totalTermCounts[a] === 1 || totalTermCounts[b] === 1) { return 0 }
return -(count / totalBigrams) / ((totalTermCounts[a] / totalTerms) * (totalTermCounts[b] / totalTerms))
}
const pmis = new Map(Object.entries(totalBigramCounts).map(([k, v]) => [k, pmi(k, v)]))
@ -96,4 +96,4 @@ export const build = () => {
console.log(`Total terms: ${totalTerms}`)
console.log(`Total bigrams: ${totalBigrams}`)
return msgpack.pack(records)
}
}