From b3ae78f41338fd634524364e8bfee29864750df0 Mon Sep 17 00:00:00 2001 From: Christian Fehmer Date: Wed, 6 Dec 2023 17:03:44 +0100 Subject: [PATCH] perf: improve leaderboard aggregation (fehmer) (#4841) * perf: improve leaderboard aggregation Update the leaderboard and public speedStats directly in mongodb without importing the data in the nodejs process first * Replace type on public collection with magic ids --- backend/__tests__/dal/public.spec.ts | 32 ------ backend/jest-mongodb-config.js | 2 +- backend/src/dal/leaderboards.ts | 160 +++++++++++++-------------- backend/src/dal/public.ts | 7 +- backend/src/types/types.d.ts | 11 +- 5 files changed, 85 insertions(+), 127 deletions(-) diff --git a/backend/__tests__/dal/public.spec.ts b/backend/__tests__/dal/public.spec.ts index 5773d6135..92b9aedf5 100644 --- a/backend/__tests__/dal/public.spec.ts +++ b/backend/__tests__/dal/public.spec.ts @@ -1,23 +1,4 @@ import * as PublicDAL from "../../src/dal/public"; -import * as db from "../../src/init/db"; -import { ObjectId } from "mongodb"; - -const mockSpeedHistogram = { - type: "speedStats", - english_time_15: { - "70": 2761, - "80": 2520, - "90": 2391, - "100": 2317, - }, - english_time_60: { - "50": 8781, - "60": 2978, - "70": 2786, - "80": 2572, - "90": 2399, - }, -}; describe("PublicDAL", function () { it("should be able to update stats", async function () { @@ -41,17 +22,4 @@ describe("PublicDAL", function () { expect(afterStats.testsStarted).toBe(priorStats.testsStarted + 2); expect(afterStats.timeTyping).toBe(priorStats.timeTyping + 60); }); - - it("should be able to get speed histogram", async function () { - // this test ensures that the property access is correct - await db - .collection("public") - .replaceOne({ type: "speedStats" }, mockSpeedHistogram, { upsert: true }); - const speedHistogram = await PublicDAL.getSpeedHistogram( - "english", - "time", - "60" - ); - expect(speedHistogram["50"]).toBe(8781); // check a value in the histogram that has been set - }); }); diff --git a/backend/jest-mongodb-config.js b/backend/jest-mongodb-config.js index 96083cb0b..395108d70 100644 --- a/backend/jest-mongodb-config.js +++ b/backend/jest-mongodb-config.js @@ -1,7 +1,7 @@ module.exports = { mongodbMemoryServerOptions: { binary: { - version: "4.0.3", + version: "6.0.12", skipMD5: true, }, instance: { diff --git a/backend/src/dal/leaderboards.ts b/backend/src/dal/leaderboards.ts index 8b4644930..4b77ce046 100644 --- a/backend/src/dal/leaderboards.ts +++ b/backend/src/dal/leaderboards.ts @@ -60,15 +60,16 @@ export async function getRank( export async function update( mode: string, mode2: string, - language: string, - uid?: string + language: string ): Promise<{ message: string; rank?: number; }> { const key = `lbPersonalBests.${mode}.${mode2}.${language}`; + const lbCollectionName = `leaderboards.${language}.${mode}.${mode2}`; + leaderboardUpdating[`${language}_${mode}_${mode2}`] = true; const start1 = performance.now(); - const lb = await db + const lb = db .collection("users") .aggregate( [ @@ -112,10 +113,6 @@ export async function update( [`${key}.raw`]: 1, [`${key}.consistency`]: 1, [`${key}.timestamp`]: 1, - banned: 1, - lbOptOut: 1, - needsToChangeName: 1, - timeTyping: 1, uid: 1, name: 1, discordId: 1, @@ -123,13 +120,27 @@ export async function update( inventory: 1, }, }, + { $addFields: { [`${key}.uid`]: "$uid", [`${key}.name`]: "$name", [`${key}.discordId`]: "$discordId", [`${key}.discordAvatar`]: "$discordAvatar", - [`${key}.badges`]: "$inventory.badges", + [`${key}.rank`]: { + $function: { + body: "function() {try {row_number+= 1;} catch (e) {row_number= 1;}return row_number;}", + args: [], + lang: "js", + }, + }, + [`${key}.badgeId`]: { + $function: { + body: "function(badges) {if (!badges) return null; for(let i=0;i badge.selected); - if (selectedBadge) { - lbEntry.badgeId = selectedBadge.id; - } - delete lbEntry.badges; - } - } + await db.collection(lbCollectionName).createIndex({ uid: -1 }); + await db.collection(lbCollectionName).createIndex({ rank: 1 }); + leaderboardUpdating[`${language}_${mode}_${mode2}`] = false; const end2 = performance.now(); + + //update speedStats const start3 = performance.now(); - leaderboardUpdating[`${language}_${mode}_${mode2}`] = true; - try { - await db.collection(`leaderboards.${language}.${mode}.${mode2}`).drop(); - } catch { - // - } - if (lb && lb.length !== 0) { - await db - .collection( - `leaderboards.${language}.${mode}.${mode2}` - ) - .insertMany(lb); - } + const boundaries = [...Array(32).keys()].map((it) => it * 10); + const statsKey = `${language}_${mode}_${mode2}`; + const src = await db.collection(lbCollectionName); + const histogram = src.aggregate( + [ + { + $bucket: { + groupBy: "$wpm", + boundaries: boundaries, + default: "Other", + }, + }, + { + $replaceRoot: { + newRoot: { + $arrayToObject: [[{ k: { $toString: "$_id" }, v: "$count" }]], + }, + }, + }, + { + $group: { + _id: "speedStatsHistogram", //we only expect one document with type=speedStats + [`${statsKey}`]: { + $mergeObjects: "$$ROOT", + }, + }, + }, + { + $merge: { + into: "public", + on: "_id", + whenMatched: "merge", + whenNotMatched: "insert", + }, + }, + ], + { allowDiskUse: true } + ); + await histogram.toArray(); const end3 = performance.now(); - const start4 = performance.now(); - await db.collection(`leaderboards.${language}.${mode}.${mode2}`).createIndex({ - uid: -1, - }); - await db.collection(`leaderboards.${language}.${mode}.${mode2}`).createIndex({ - rank: 1, - }); - leaderboardUpdating[`${language}_${mode}_${mode2}`] = false; - const end4 = performance.now(); - - const start5 = performance.now(); - const buckets = {}; // { "70": count, "80": count } - for (const lbEntry of lb) { - const bucket = Math.floor(lbEntry.wpm / 10).toString() + "0"; - if (bucket in buckets) buckets[bucket]++; - else buckets[bucket] = 1; - } - - await db - .collection("public") - .updateOne( - { type: "speedStats" }, - { $set: { [`${language}_${mode}_${mode2}`]: buckets } }, - { upsert: true } - ); - const end5 = performance.now(); - const timeToRunAggregate = (end1 - start1) / 1000; - const timeToRunLoop = (end2 - start2) / 1000; - const timeToRunInsert = (end3 - start3) / 1000; - const timeToRunIndex = (end4 - start4) / 1000; - const timeToSaveHistogram = (end5 - start5) / 1000; // not sent to prometheus yet + const timeToRunIndex = (end2 - start2) / 1000; + const timeToSaveHistogram = (end3 - start3) / 1000; // not sent to prometheus yet Logger.logToDb( `system_lb_update_${language}_${mode}_${mode2}`, - `Aggregate ${timeToRunAggregate}s, loop ${timeToRunLoop}s, insert ${timeToRunInsert}s, index ${timeToRunIndex}s, histogram ${timeToSaveHistogram}`, - uid + `Aggregate ${timeToRunAggregate}s, loop 0s, insert 0s, index ${timeToRunIndex}s, histogram ${timeToSaveHistogram}` ); setLeaderboard(language, mode, mode2, [ timeToRunAggregate, - timeToRunLoop, - timeToRunInsert, + 0, + 0, timeToRunIndex, ]); - if (retval) { - return { - message: "Successfully updated leaderboard", - rank: retval, - }; - } else { - return { - message: "Successfully updated leaderboard", - }; - } + return { + message: "Successfully updated leaderboard", + }; } async function createIndex(key: string): Promise { diff --git a/backend/src/dal/public.ts b/backend/src/dal/public.ts index f89225c26..6b14688ff 100644 --- a/backend/src/dal/public.ts +++ b/backend/src/dal/public.ts @@ -7,7 +7,7 @@ export async function updateStats( time: number ): Promise { await db.collection("public").updateOne( - { type: "stats" }, + { _id: "stats" }, { $inc: { testsCompleted: 1, @@ -29,9 +29,10 @@ export async function getSpeedHistogram( mode2: string ): Promise> { const key = `${language}_${mode}_${mode2}`; + const stats = await db .collection("public") - .findOne({ type: "speedStats" }, { projection: { [key]: 1 } }); + .findOne({ _id: "speedStatsHistogram" }, { projection: { [key]: 1 } }); return stats?.[key] ?? {}; } @@ -39,7 +40,7 @@ export async function getSpeedHistogram( export async function getTypingStats(): Promise { const stats = await db .collection("public") - .findOne({ type: "stats" }, { projection: { _id: 0 } }); + .findOne({ _id: "stats" }, { projection: { _id: 0 } }); if (!stats) { throw new MonkeyError( 404, diff --git a/backend/src/types/types.d.ts b/backend/src/types/types.d.ts index 68b14e718..7af5568b6 100644 --- a/backend/src/types/types.d.ts +++ b/backend/src/types/types.d.ts @@ -362,17 +362,16 @@ declare namespace MonkeyTypes { } interface PublicStats { - _id: string; + _id: "stats"; testsCompleted: number; testsStarted: number; timeTyping: number; - type: string; } - type PublicSpeedStats = TypedMongoEntry & PublicSpeedStatsByLanguage; - interface TypedMongoEntry { - _id: string; - type: "speedStats"; + type PublicSpeedStats = PublicSpeedStatsMongoEntry & + PublicSpeedStatsByLanguage; + interface PublicSpeedStatsMongoEntry { + _id: "speedStatsHistogram"; } interface PublicSpeedStatsByLanguage { [language_mode_mode2: string]: Record;