From 1346dd669c5050e37df43d6487abf2dcaa5c1ef7 Mon Sep 17 00:00:00 2001 From: Bruce Berrios <58147810+Bruception@users.noreply.github.com> Date: Fri, 24 Jun 2022 05:17:43 -0400 Subject: [PATCH] Tune profanity detection (#3214) * Tune profanity detection * Remove console log * Remove profanity * Add tests for profanity * Fix test case --- backend/__tests__/utils/validation.spec.ts | 124 +++++++++++++++++++++ backend/src/constants/profanities.ts | 9 -- backend/src/utils/validation.ts | 9 +- 3 files changed, 131 insertions(+), 11 deletions(-) create mode 100644 backend/__tests__/utils/validation.spec.ts diff --git a/backend/__tests__/utils/validation.spec.ts b/backend/__tests__/utils/validation.spec.ts new file mode 100644 index 000000000..ccef9bd5b --- /dev/null +++ b/backend/__tests__/utils/validation.spec.ts @@ -0,0 +1,124 @@ +import * as Validation from "../../src/utils/validation"; + +describe("Validation", () => { + it("inRange", () => { + const testCases = [ + { + value: 1, + min: 1, + max: 2, + expected: true, + }, + { + value: 1, + min: 2, + max: 2, + expected: false, + }, + { + value: 1, + min: 1, + max: 1, + expected: true, + }, + { + value: 53, + min: -100, + max: 100, + expected: true, + }, + { + value: 153, + min: -100, + max: 100, + expected: false, + }, + ]; + + testCases.forEach((testCase) => { + expect( + Validation.inRange(testCase.value, testCase.min, testCase.max) + ).toBe(testCase.expected); + }); + }); + + it("isUsernameValid", () => { + const testCases = [ + { + name: "Miodec", + expected: false, + }, + { + name: "fucker", + expected: false, + }, + { + name: "Bruce", + expected: true, + }, + { + name: "Rizwan_123", + expected: true, + }, + { + name: "Fe-rotiq._123._", + expected: true, + }, + { + name: " ", + expected: false, + }, + { + name: "", + expected: false, + }, + { + name: "superduperlongnamethatshouldbeinvalid", + expected: false, + }, + { + name: ".period", + expected: false, + }, + { + name: "fucking_profane", + expected: false, + }, + ]; + + testCases.forEach((testCase) => { + expect(Validation.isUsernameValid(testCase.name)).toBe(testCase.expected); + }); + }); + + it("containsProfanity", () => { + const testCases = [ + { + text: "https://www.fuckyou.com", + expected: true, + }, + { + text: "Hello world!", + expected: false, + }, + { + text: "I fucking hate you", + expected: true, + }, + { + text: "I love you", + expected: false, + }, + { + text: "\n.fuck!", + expected: true, + }, + ]; + + testCases.forEach((testCase) => { + expect(Validation.containsProfanity(testCase.text)).toBe( + testCase.expected + ); + }); + }); +}); diff --git a/backend/src/constants/profanities.ts b/backend/src/constants/profanities.ts index 09e26620c..3241be630 100644 --- a/backend/src/constants/profanities.ts +++ b/backend/src/constants/profanities.ts @@ -92,16 +92,13 @@ const profanities = [ "fukker", "fukkin", "g00k", - "gay", "gayboy", "gaygirl", - "gays", "gayz", "god-damned", "h00r", "h0ar", "h0re", - "hells", "hoar", "hoor", "hoore", @@ -118,7 +115,6 @@ const profanities = [ "kunt", "kunts", "kuntz", - "lesbian", "lezzian", "lipshits", "lipshitz", @@ -177,9 +173,6 @@ const profanities = [ "pussy", "puuke", "puuker", - "queer", - "queers", - "queerz", "qweers", "qweerz", "qweir", @@ -269,7 +262,6 @@ const profanities = [ "kawk", "l3itch", "l3i+ch", - "lesbian", "masturbate", "masterbat", "masterbat3", @@ -350,7 +342,6 @@ const profanities = [ "fu(", "fuk", "futkretzn", - "gay", "gook", "guiena", "h0r", diff --git a/backend/src/utils/validation.ts b/backend/src/utils/validation.ts index 6b20640b8..bae616938 100644 --- a/backend/src/utils/validation.ts +++ b/backend/src/utils/validation.ts @@ -1,6 +1,6 @@ import _ from "lodash"; import profanities from "../constants/profanities"; -import { matchesAPattern } from "./misc"; +import { matchesAPattern, sanitizeString } from "./misc"; export function inRange(value: number, min: number, max: number): boolean { return value >= min && value <= max; @@ -31,7 +31,12 @@ export function isUsernameValid(name: string): boolean { } export function containsProfanity(text: string): boolean { - const normalizedText = text.toLowerCase().split(" "); + const normalizedText = text + .toLowerCase() + .split(/[.,"/#!?$%^&*;:{}=\-_`~()\s\n]+/g) + .map((str) => { + return sanitizeString(str) ?? ""; + }); const hasProfanity = profanities.some((profanity) => { const normalizedProfanity = _.escapeRegExp(profanity.toLowerCase());