mirror of
https://github.com/monkeytypegame/monkeytype.git
synced 2025-03-04 02:34:43 +08:00
fix(zipf): improve approximation of zipf distribution (@NadAlaba) (#5515)
* fix(zipf): improve approximation of zipf distribution * rename --------- Co-authored-by: Miodec <jack@monkeytype.com>
This commit is contained in:
parent
d12da37050
commit
211253becb
2 changed files with 15 additions and 9 deletions
|
@ -1,5 +1,5 @@
|
|||
import * as FunboxList from "./funbox/funbox-list";
|
||||
import { dreymarIndex } from "../utils/misc";
|
||||
import { zipfyRandomArrayIndex } from "../utils/misc";
|
||||
import { randomElementFromArray, shuffle } from "../utils/arrays";
|
||||
import Config from "../config";
|
||||
|
||||
|
@ -25,7 +25,7 @@ export class Wordset implements MonkeyTypes.Wordset {
|
|||
|
||||
randomWord(mode: MonkeyTypes.FunboxWordsFrequency): string {
|
||||
if (mode === "zipf") {
|
||||
return this.words[dreymarIndex(this.words.length)] as string;
|
||||
return this.words[zipfyRandomArrayIndex(this.words.length)] as string;
|
||||
} else {
|
||||
return randomElementFromArray(this.words);
|
||||
}
|
||||
|
|
|
@ -579,14 +579,20 @@ export function isDevEnvironment(): boolean {
|
|||
return envConfig.isDevelopment;
|
||||
}
|
||||
|
||||
export function dreymarIndex(arrayLength: number): number {
|
||||
const n = arrayLength;
|
||||
const g = 0.5772156649;
|
||||
const M = Math.log(n) + g;
|
||||
export function zipfyRandomArrayIndex(dictLength: number): number {
|
||||
/**
|
||||
* get random index based on probability distribution of Zipf's law,
|
||||
* where PMF is (1/n)/H_N,
|
||||
* where H_N is the Harmonic number of (N), where N is dictLength
|
||||
* and the harmonic number is approximated using the formula:
|
||||
* H_n = ln(n + 0.5) + gamma
|
||||
*/
|
||||
const gamma = 0.5772156649015329; // Euler–Mascheroni constant
|
||||
const H_N = Math.log(dictLength + 0.5) + gamma; // approximation of H_N
|
||||
const r = Math.random();
|
||||
const h = Math.exp(r * M - g);
|
||||
const W = Math.ceil(h);
|
||||
return W - 1;
|
||||
/* inverse of CDF where CDF is H_n/H_N */
|
||||
const inverseCDF = Math.exp(r * H_N - gamma) - 0.5;
|
||||
return Math.floor(inverseCDF);
|
||||
}
|
||||
|
||||
export async function checkIfLanguageSupportsZipf(
|
||||
|
|
Loading…
Reference in a new issue