trilium/src/public/app/services/attribute_parser.js

227 lines
5.6 KiB
JavaScript
Raw Normal View History

function preprocess(str) {
if (str.startsWith('<p>')) {
str = str.substr(3);
}
if (str.endsWith('</p>')) {
str = str.substr(0, str.length - 4);
}
2020-06-20 15:39:44 +08:00
str = str.replace(/&nbsp;/g, " ");
return str.replace(/<a[^>]+href="(#[A-Za-z0-9/]*)"[^>]*>[^<]*<\/a>/g, "$1");
2020-06-04 06:04:57 +08:00
}
2020-06-03 22:24:41 +08:00
function lexer(str) {
const tokens = [];
2020-06-03 22:24:41 +08:00
let quotes = false;
let currentWord = '';
function isOperatorSymbol(chr) {
return ['=', '*', '>', '<', '!'].includes(chr);
}
function previousOperatorSymbol() {
if (currentWord.length === 0) {
return false;
}
else {
return isOperatorSymbol(currentWord[currentWord.length - 1]);
}
}
/**
* @param endIndex - index of the last character of the token
*/
function finishWord(endIndex) {
2020-06-03 22:24:41 +08:00
if (currentWord === '') {
return;
}
tokens.push({
text: currentWord,
2020-06-06 18:56:24 +08:00
startIndex: endIndex - currentWord.length + 1,
endIndex: endIndex
});
2020-06-03 22:24:41 +08:00
currentWord = '';
}
for (let i = 0; i < str.length; i++) {
const chr = str[i];
if (chr === '\\') {
if ((i + 1) < str.length) {
i++;
currentWord += str[i];
}
else {
currentWord += chr;
}
continue;
}
else if (['"', "'", '`'].includes(chr)) {
if (!quotes) {
2020-06-03 23:11:03 +08:00
if (previousOperatorSymbol()) {
finishWord(i - 1);
2020-06-03 22:24:41 +08:00
}
2020-06-03 23:11:03 +08:00
quotes = chr;
2020-06-03 22:24:41 +08:00
}
else if (quotes === chr) {
quotes = false;
finishWord(i - 1);
2020-06-03 22:24:41 +08:00
}
else {
// it's a quote but within other kind of quotes so it's valid as a literal character
currentWord += chr;
}
continue;
}
else if (!quotes) {
if (currentWord.length === 0 && (chr === '#' || chr === '~')) {
currentWord = chr;
continue;
}
else if (chr === ' ') {
finishWord(i - 1);
2020-06-03 22:24:41 +08:00
continue;
}
2020-07-14 05:27:23 +08:00
else if (['(', ')'].includes(chr)) {
finishWord(i - 1);
currentWord = chr;
finishWord(i);
continue;
}
2020-06-03 23:11:03 +08:00
else if (previousOperatorSymbol() !== isOperatorSymbol(chr)) {
finishWord(i - 1);
2020-06-03 22:24:41 +08:00
currentWord += chr;
continue;
}
}
currentWord += chr;
}
finishWord(str.length - 1);
2020-06-03 22:24:41 +08:00
return tokens;
2020-06-03 22:24:41 +08:00
}
2020-06-20 15:39:44 +08:00
function parser(tokens, str, allowEmptyRelations = false) {
2020-06-04 06:04:57 +08:00
const attrs = [];
2020-06-20 15:39:44 +08:00
function context(i) {
let {startIndex, endIndex} = tokens[i];
startIndex = Math.max(0, startIndex - 20);
endIndex = Math.min(str.length, endIndex + 20);
return '"' + (startIndex !== 0 ? "..." : "")
+ str.substr(startIndex, endIndex - startIndex)
+ (endIndex !== str.length ? "..." : "") + '"';
}
2020-06-04 06:04:57 +08:00
for (let i = 0; i < tokens.length; i++) {
2020-07-14 05:27:23 +08:00
const {text, startIndex} = tokens[i];
function isInheritable() {
if (tokens.length > i + 3
&& tokens[i + 1].text === '('
&& tokens[i + 2].text === 'inheritable'
&& tokens[i + 3].text === ')') {
i += 3;
return true;
}
else {
return false;
}
}
2020-06-04 06:04:57 +08:00
if (text.startsWith('#')) {
2020-06-04 06:04:57 +08:00
const attr = {
type: 'label',
name: text.substr(1),
2020-07-14 05:27:23 +08:00
isInheritable: isInheritable(),
2020-06-26 05:56:06 +08:00
startIndex: startIndex,
2020-07-14 05:27:23 +08:00
endIndex: tokens[i].endIndex // i could be moved by isInheritable
2020-06-04 06:04:57 +08:00
};
if (i + 1 < tokens.length && tokens[i + 1].text === "=") {
2020-06-04 06:04:57 +08:00
if (i + 2 >= tokens.length) {
2020-06-20 15:39:44 +08:00
throw new Error(`Missing value for label "${text}" in ${context(i)}`);
2020-06-04 06:04:57 +08:00
}
i += 2;
attr.value = tokens[i].text;
2020-06-26 05:56:06 +08:00
attr.endIndex = tokens[i].endIndex;
2020-06-04 06:04:57 +08:00
}
attrs.push(attr);
}
else if (text.startsWith('~')) {
2020-06-06 18:56:24 +08:00
const attr = {
type: 'relation',
name: text.substr(1),
2020-07-14 05:27:23 +08:00
isInheritable: isInheritable(),
2020-06-26 05:56:06 +08:00
startIndex: startIndex,
2020-07-14 05:27:23 +08:00
endIndex: tokens[i].endIndex // i could be moved by isInheritable
2020-06-06 18:56:24 +08:00
};
attrs.push(attr);
if (i + 2 >= tokens.length || tokens[i + 1].text !== '=') {
2020-06-06 18:56:24 +08:00
if (allowEmptyRelations) {
break;
}
else {
2020-06-20 15:39:44 +08:00
throw new Error(`Relation "${text}" in ${context(i)} should point to a note.`);
2020-06-06 18:56:24 +08:00
}
2020-06-04 06:04:57 +08:00
}
i += 2;
let notePath = tokens[i].text;
if (notePath.startsWith("#")) {
notePath = notePath.substr(1);
}
const noteId = notePath.split('/').pop();
2020-06-06 18:56:24 +08:00
attr.value = noteId;
2020-06-26 05:56:06 +08:00
attr.endIndex = tokens[i].endIndex;
2020-06-04 06:04:57 +08:00
}
else {
2020-06-20 15:39:44 +08:00
throw new Error(`Unrecognized attribute "${text}" in ${context(i)}`);
2020-06-04 06:04:57 +08:00
}
}
return attrs;
}
2020-06-06 18:56:24 +08:00
function lexAndParse(str, allowEmptyRelations = false) {
2020-06-20 15:39:44 +08:00
str = preprocess(str);
const tokens = lexer(str);
2020-06-20 15:39:44 +08:00
return parser(tokens, str, allowEmptyRelations);
}
2020-06-03 22:24:41 +08:00
export default {
2020-07-10 05:59:27 +08:00
preprocess,
2020-06-04 06:04:57 +08:00
lexer,
parser,
lexAndParse
2020-06-03 22:24:41 +08:00
}