2020-07-21 06:01:07 +08:00
|
|
|
const lex = require('../../src/services/search/services/lex.js');
|
2020-05-18 01:43:37 +08:00
|
|
|
|
2020-05-18 05:14:24 +08:00
|
|
|
describe("Lexer fulltext", () => {
|
2020-05-18 01:43:37 +08:00
|
|
|
it("simple lexing", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex("hello world").fulltextTokens.map(t => t.token))
|
2020-05-18 01:43:37 +08:00
|
|
|
.toEqual(["hello", "world"]);
|
|
|
|
});
|
|
|
|
|
|
|
|
it("use quotes to keep words together", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex("'hello world' my friend").fulltextTokens.map(t => t.token))
|
2020-05-18 01:43:37 +08:00
|
|
|
.toEqual(["hello world", "my", "friend"]);
|
|
|
|
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex('"hello world" my friend').fulltextTokens.map(t => t.token))
|
2020-05-18 01:43:37 +08:00
|
|
|
.toEqual(["hello world", "my", "friend"]);
|
|
|
|
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex('`hello world` my friend').fulltextTokens.map(t => t.token))
|
2020-05-18 01:43:37 +08:00
|
|
|
.toEqual(["hello world", "my", "friend"]);
|
|
|
|
});
|
|
|
|
|
|
|
|
it("you can use different quotes and other special characters inside quotes", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex("'i can use \" or ` or #~=*' without problem").fulltextTokens.map(t => t.token))
|
2020-05-23 16:25:22 +08:00
|
|
|
.toEqual(["i can use \" or ` or #~=*", "without", "problem"]);
|
2020-05-18 01:43:37 +08:00
|
|
|
});
|
|
|
|
|
2020-08-21 05:15:38 +08:00
|
|
|
it("I can use backslash to escape quotes", () => {
|
|
|
|
expect(lex("hello \\\"world\\\"").fulltextTokens.map(t => t.token))
|
|
|
|
.toEqual(["hello", '"world"']);
|
|
|
|
|
|
|
|
expect(lex("hello \\\'world\\\'").fulltextTokens.map(t => t.token))
|
|
|
|
.toEqual(["hello", "'world'"]);
|
|
|
|
|
|
|
|
expect(lex("hello \\\`world\\\`").fulltextTokens.map(t => t.token))
|
|
|
|
.toEqual(["hello", '`world`']);
|
|
|
|
|
|
|
|
expect(lex('"hello \\\"world\\\"').fulltextTokens.map(t => t.token))
|
|
|
|
.toEqual(['hello "world"']);
|
|
|
|
|
|
|
|
expect(lex("'hello \\\'world\\\''").fulltextTokens.map(t => t.token))
|
|
|
|
.toEqual(["hello 'world'"]);
|
|
|
|
|
|
|
|
expect(lex("`hello \\\`world\\\``").fulltextTokens.map(t => t.token))
|
|
|
|
.toEqual(["hello `world`"]);
|
|
|
|
|
|
|
|
expect(lex("\\#token").fulltextTokens.map(t => t.token))
|
|
|
|
.toEqual(["#token"]);
|
|
|
|
});
|
|
|
|
|
2020-07-22 05:42:59 +08:00
|
|
|
it("quote inside a word does not have a special meaning", () => {
|
|
|
|
const lexResult = lex("d'Artagnan is dead #hero = d'Artagnan");
|
|
|
|
|
|
|
|
expect(lexResult.fulltextTokens.map(t => t.token))
|
|
|
|
.toEqual(["d'artagnan", "is", "dead"]);
|
|
|
|
|
|
|
|
expect(lexResult.expressionTokens.map(t => t.token))
|
|
|
|
.toEqual(['#hero', '=', "d'artagnan"]);
|
|
|
|
});
|
|
|
|
|
2020-05-18 01:43:37 +08:00
|
|
|
it("if quote is not ended then it's just one long token", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex("'unfinished quote").fulltextTokens.map(t => t.token))
|
2020-05-18 01:43:37 +08:00
|
|
|
.toEqual(["unfinished quote"]);
|
|
|
|
});
|
2020-05-18 05:14:24 +08:00
|
|
|
|
|
|
|
it("parenthesis and symbols in fulltext section are just normal characters", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex("what's u=p <b(r*t)h>").fulltextTokens.map(t => t.token))
|
2020-05-18 05:14:24 +08:00
|
|
|
.toEqual(["what's", "u=p", "<b(r*t)h>"]);
|
|
|
|
});
|
|
|
|
|
2020-08-24 03:53:50 +08:00
|
|
|
it("operator characters in expressions are separate tokens", () => {
|
|
|
|
expect(lex("# abc+=-def**-+d").expressionTokens.map(t => t.token))
|
|
|
|
.toEqual(["#", "abc", "+=-", "def", "**-+", "d"]);
|
|
|
|
});
|
|
|
|
|
2020-05-18 05:14:24 +08:00
|
|
|
it("escaping special characters", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex("hello \\#\\~\\'").fulltextTokens.map(t => t.token))
|
2020-05-23 16:25:22 +08:00
|
|
|
.toEqual(["hello", "#~'"]);
|
2020-05-18 05:14:24 +08:00
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
describe("Lexer expression", () => {
|
|
|
|
it("simple attribute existence", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex("#label ~relation").expressionTokens.map(t => t.token))
|
2020-05-23 16:25:22 +08:00
|
|
|
.toEqual(["#label", "~relation"]);
|
2020-05-18 05:14:24 +08:00
|
|
|
});
|
|
|
|
|
|
|
|
it("simple label operators", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex("#label*=*text").expressionTokens.map(t => t.token))
|
2020-05-18 05:14:24 +08:00
|
|
|
.toEqual(["#label", "*=*", "text"]);
|
|
|
|
});
|
|
|
|
|
2020-07-20 05:23:48 +08:00
|
|
|
it("simple label operator with in quotes and without", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex("#label*=*'text'").expressionTokens)
|
2020-07-20 05:23:48 +08:00
|
|
|
.toEqual([
|
2020-07-22 05:42:59 +08:00
|
|
|
{token: "#label", inQuotes: false, startIndex: 0, endIndex: 5},
|
|
|
|
{token: "*=*", inQuotes: false, startIndex: 6, endIndex: 8},
|
|
|
|
{token: "text", inQuotes: true, startIndex: 10, endIndex: 13}
|
2020-07-20 05:23:48 +08:00
|
|
|
]);
|
|
|
|
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex("#label*=*text").expressionTokens)
|
2020-07-20 05:23:48 +08:00
|
|
|
.toEqual([
|
2020-07-22 05:42:59 +08:00
|
|
|
{token: "#label", inQuotes: false, startIndex: 0, endIndex: 5},
|
|
|
|
{token: "*=*", inQuotes: false, startIndex: 6, endIndex: 8},
|
|
|
|
{token: "text", inQuotes: false, startIndex: 9, endIndex: 12}
|
2020-07-20 05:23:48 +08:00
|
|
|
]);
|
|
|
|
});
|
|
|
|
|
2020-08-20 19:53:15 +08:00
|
|
|
it("note. prefix also separates fulltext from expression", () => {
|
|
|
|
expect(lex(`hello fulltext note.labels.capital = Prague`).expressionTokens.map(t => t.token))
|
|
|
|
.toEqual(["note", ".", "labels", ".", "capital", "=", "prague"]);
|
|
|
|
});
|
|
|
|
|
|
|
|
it("note. prefix in quotes will note start expression", () => {
|
|
|
|
expect(lex(`hello fulltext "note.txt"`).expressionTokens.map(t => t.token))
|
|
|
|
.toEqual([]);
|
|
|
|
|
|
|
|
expect(lex(`hello fulltext "note.txt"`).fulltextTokens.map(t => t.token))
|
|
|
|
.toEqual(["hello", "fulltext", "note.txt"]);
|
|
|
|
});
|
|
|
|
|
2020-05-18 05:14:24 +08:00
|
|
|
it("complex expressions with and, or and parenthesis", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex(`# (#label=text OR #second=text) AND ~relation`).expressionTokens.map(t => t.token))
|
2020-05-23 16:25:22 +08:00
|
|
|
.toEqual(["#", "(", "#label", "=", "text", "or", "#second", "=", "text", ")", "and", "~relation"]);
|
|
|
|
});
|
|
|
|
|
|
|
|
it("dot separated properties", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex(`# ~author.title = 'Hugh Howey' AND note.'book title' = 'Silo'`).expressionTokens.map(t => t.token))
|
2020-05-24 04:18:06 +08:00
|
|
|
.toEqual(["#", "~author", ".", "title", "=", "hugh howey", "and", "note", ".", "book title", "=", "silo"]);
|
2020-05-18 05:14:24 +08:00
|
|
|
});
|
2020-05-27 06:09:19 +08:00
|
|
|
|
2020-07-19 21:25:24 +08:00
|
|
|
it("negation of label and relation", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex(`#!capital ~!neighbor`).expressionTokens.map(t => t.token))
|
2020-07-19 21:25:24 +08:00
|
|
|
.toEqual(["#!capital", "~!neighbor"]);
|
|
|
|
});
|
|
|
|
|
2020-05-27 06:09:19 +08:00
|
|
|
it("negation of sub-expression", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex(`# not(#capital) and note.noteId != "root"`).expressionTokens.map(t => t.token))
|
2020-05-27 06:09:19 +08:00
|
|
|
.toEqual(["#", "not", "(", "#capital", ")", "and", "note", ".", "noteid", "!=", "root"]);
|
|
|
|
});
|
2020-05-18 01:43:37 +08:00
|
|
|
});
|
2020-07-20 05:19:45 +08:00
|
|
|
|
|
|
|
describe("Lexer invalid queries and edge cases", () => {
|
|
|
|
it("concatenated attributes", () => {
|
2020-07-21 06:01:07 +08:00
|
|
|
expect(lex("#label~relation").expressionTokens.map(t => t.token))
|
2020-07-20 05:19:45 +08:00
|
|
|
.toEqual(["#label", "~relation"]);
|
|
|
|
});
|
|
|
|
|
2020-07-22 05:42:59 +08:00
|
|
|
it("trailing escape \\", () => {
|
|
|
|
expect(lex('abc \\').fulltextTokens.map(t => t.token))
|
|
|
|
.toEqual(["abc", "\\"]);
|
2020-07-20 05:19:45 +08:00
|
|
|
});
|
|
|
|
});
|