mirror of
https://github.com/zadam/trilium.git
synced 2025-10-06 21:47:31 +08:00
feat(editor): try to have ckeditor not crash when handling stranger tags
This commit is contained in:
parent
83be42f4ea
commit
ba1c6ba0e1
2 changed files with 440 additions and 10 deletions
295
apps/client/src/widgets/type_widgets/editable_text.test.ts
Normal file
295
apps/client/src/widgets/type_widgets/editable_text.test.ts
Normal file
|
@ -0,0 +1,295 @@
|
|||
import { describe, it, expect } from 'vitest';
|
||||
|
||||
// Mock the EditableTextTypeWidget class to test the escaping methods
|
||||
class MockEditableTextTypeWidget {
|
||||
private escapeGenericTypeSyntax(content: string): string {
|
||||
if (!content) return content;
|
||||
|
||||
try {
|
||||
// Count replacements for debugging
|
||||
let replacementCount = 0;
|
||||
|
||||
// List of known HTML tags that should NOT be escaped
|
||||
const htmlTags = new Set([
|
||||
// Block elements
|
||||
'div', 'p', 'section', 'article', 'nav', 'header', 'footer', 'aside', 'main',
|
||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
||||
'ul', 'ol', 'li', 'dl', 'dt', 'dd',
|
||||
'table', 'thead', 'tbody', 'tfoot', 'tr', 'td', 'th', 'caption', 'colgroup', 'col',
|
||||
'form', 'fieldset', 'legend',
|
||||
'blockquote', 'pre', 'figure', 'figcaption',
|
||||
'address', 'hr', 'br',
|
||||
|
||||
// Inline elements
|
||||
'span', 'a', 'strong', 'em', 'b', 'i', 'u', 's', 'del', 'ins',
|
||||
'small', 'mark', 'sub', 'sup',
|
||||
'code', 'kbd', 'samp', 'var',
|
||||
'q', 'cite', 'abbr', 'dfn', 'time',
|
||||
'img', 'picture', 'source',
|
||||
|
||||
// Form elements
|
||||
'input', 'textarea', 'button', 'select', 'option', 'optgroup',
|
||||
'label', 'output', 'progress', 'meter',
|
||||
|
||||
// Media elements
|
||||
'audio', 'video', 'track',
|
||||
'canvas', 'svg',
|
||||
|
||||
// Metadata elements
|
||||
'head', 'title', 'meta', 'link', 'style', 'script', 'noscript',
|
||||
'base',
|
||||
|
||||
// Document structure
|
||||
'html', 'body',
|
||||
|
||||
// Other common elements
|
||||
'iframe', 'embed', 'object', 'param',
|
||||
'details', 'summary', 'dialog',
|
||||
'template', 'slot',
|
||||
'area', 'map',
|
||||
'ruby', 'rt', 'rp',
|
||||
'bdi', 'bdo', 'wbr',
|
||||
'data', 'datalist',
|
||||
'keygen', 'output',
|
||||
'math', 'mi', 'mo', 'mn', 'ms', 'mtext', 'mspace',
|
||||
|
||||
// Custom elements that Trilium uses
|
||||
'includenote'
|
||||
]);
|
||||
|
||||
// More comprehensive escaping strategy:
|
||||
// We'll use a different approach - parse through the content and identify
|
||||
// what looks like HTML vs what looks like generic type syntax
|
||||
|
||||
// First pass: Protect actual HTML tags by temporarily replacing them
|
||||
const htmlProtectionMap = new Map<string, string>();
|
||||
let protectionCounter = 0;
|
||||
|
||||
// Protect complete HTML tags (opening, closing, and self-closing)
|
||||
content = content.replace(/<\/?([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*)?\/?>|<!--[\s\S]*?-->/g, (match, tagName) => {
|
||||
// Check if this is a comment
|
||||
if (match.startsWith('<!--')) {
|
||||
const placeholder = `__HTML_PROTECTED_${protectionCounter++}__`;
|
||||
htmlProtectionMap.set(placeholder, match);
|
||||
return placeholder;
|
||||
}
|
||||
|
||||
// Extract just the tag name (first word after < or </)
|
||||
const actualTagName = tagName?.toLowerCase();
|
||||
|
||||
// Only protect if it's a known HTML tag
|
||||
if (actualTagName && htmlTags.has(actualTagName)) {
|
||||
const placeholder = `__HTML_PROTECTED_${protectionCounter++}__`;
|
||||
htmlProtectionMap.set(placeholder, match);
|
||||
return placeholder;
|
||||
}
|
||||
|
||||
// Not a known HTML tag, leave it for escaping
|
||||
return match;
|
||||
});
|
||||
|
||||
// Second pass: Now escape all remaining angle brackets that weren't protected
|
||||
// These are likely generic type syntax or other non-HTML patterns
|
||||
content = content.replace(/</g, () => {
|
||||
replacementCount++;
|
||||
return '<';
|
||||
});
|
||||
content = content.replace(/>/g, () => {
|
||||
replacementCount++;
|
||||
return '>';
|
||||
});
|
||||
|
||||
// Third pass: Restore the protected HTML tags
|
||||
htmlProtectionMap.forEach((originalHtml, placeholder) => {
|
||||
content = content.replace(placeholder, originalHtml);
|
||||
});
|
||||
|
||||
return content;
|
||||
} catch (error) {
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
||||
private unescapeGenericTypeSyntax(content: string): string {
|
||||
if (!content) return content;
|
||||
|
||||
// Simply replace all < with < and > with >
|
||||
// This is the correct behavior because CKEditor expects raw HTML
|
||||
// Any entities that should display as literal text need to be double-escaped
|
||||
content = content.replace(/</g, '<');
|
||||
content = content.replace(/>/g, '>');
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
testEscape(content: string): string {
|
||||
return this.escapeGenericTypeSyntax(content);
|
||||
}
|
||||
|
||||
testUnescape(content: string): string {
|
||||
return this.unescapeGenericTypeSyntax(content);
|
||||
}
|
||||
}
|
||||
|
||||
describe('EditableTextTypeWidget - Generic Type Escaping', () => {
|
||||
const widget = new MockEditableTextTypeWidget();
|
||||
|
||||
it('should escape generic type syntax with comma after tag name', () => {
|
||||
const input = '<PhaseType,';
|
||||
const expected = '<PhaseType,';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should escape generic type syntax with two type parameters', () => {
|
||||
const input = '<String, PromptTemplate>';
|
||||
const expected = '<String, PromptTemplate>';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should escape nested generic types', () => {
|
||||
const input = 'HashMap<String, List<Item>>';
|
||||
const expected = 'HashMap<String, List<Item>>';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should not escape valid HTML tags', () => {
|
||||
const input = '<div class="test">content</div>';
|
||||
const expected = '<div class="test">content</div>';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should not escape HTML tags with attributes containing commas', () => {
|
||||
const input = '<div data-values="1,2,3">content</div>';
|
||||
const expected = '<div data-values="1,2,3">content</div>';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle mixed content with both generics and HTML', () => {
|
||||
const input = 'Code: <String, Type> and HTML: <div>content</div>';
|
||||
const expected = 'Code: <String, Type> and HTML: <div>content</div>';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should properly unescape escaped content', () => {
|
||||
const testCases = [
|
||||
'<String, PromptTemplate>',
|
||||
'<PhaseType,',
|
||||
'<RiskLevel, f32>',
|
||||
'HashMap<String, List<Item>>',
|
||||
'Mixed: <String, Type> and <div>HTML</div>'
|
||||
];
|
||||
|
||||
testCases.forEach(original => {
|
||||
const escaped = widget.testEscape(original);
|
||||
const unescaped = widget.testUnescape(escaped);
|
||||
expect(unescaped).toBe(original);
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle empty or null content', () => {
|
||||
expect(widget.testEscape('')).toBe('');
|
||||
expect(widget.testUnescape('')).toBe('');
|
||||
});
|
||||
|
||||
it('should handle content with multiple generic patterns', () => {
|
||||
const input = `
|
||||
pub struct LlmService {
|
||||
anthropic_client: Option<AnthropicClient>,
|
||||
openai_client: Option<OpenAIClient>,
|
||||
templates: HashMap<String, PromptTemplate>,
|
||||
}
|
||||
`;
|
||||
|
||||
const escaped = widget.testEscape(input);
|
||||
expect(escaped).toContain('Option<AnthropicClient>');
|
||||
expect(escaped).toContain('Option<OpenAIClient>');
|
||||
expect(escaped).toContain('HashMap<String, PromptTemplate>');
|
||||
expect(escaped).not.toContain('Option<AnthropicClient>');
|
||||
|
||||
const unescaped = widget.testUnescape(escaped);
|
||||
expect(unescaped).toBe(input);
|
||||
});
|
||||
|
||||
// Additional test cases for problematic patterns
|
||||
it('should escape Rust Box<dyn patterns', () => {
|
||||
const input = 'Box<dyn';
|
||||
const expected = 'Box<dyn';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should escape Rust trait object syntax', () => {
|
||||
const input = 'Box<dyn Error>';
|
||||
const expected = 'Box<dyn Error>';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should escape complex Rust trait bounds', () => {
|
||||
const input = 'Box<dyn Error + Send + Sync>';
|
||||
const expected = 'Box<dyn Error + Send + Sync>';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should escape incomplete generic syntax with string', () => {
|
||||
const input = '<string,';
|
||||
const expected = '<string,';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should escape C++ style templates', () => {
|
||||
const testCases = [
|
||||
{ input: 'std::vector<int>', expected: 'std::vector<int>' },
|
||||
{ input: 'std::map<string, vector<int>>', expected: 'std::map<string, vector<int>>' },
|
||||
{ input: 'unique_ptr<Widget>', expected: 'unique_ptr<Widget>' }
|
||||
];
|
||||
|
||||
testCases.forEach(({ input, expected }) => {
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle edge cases with standalone angle brackets', () => {
|
||||
const testCases = [
|
||||
{ input: '<', expected: '<' },
|
||||
{ input: '>', expected: '>' },
|
||||
{ input: '< >', expected: '< >' },
|
||||
{ input: '<>', expected: '<>' },
|
||||
{ input: '<<>>', expected: '<<>>' },
|
||||
{ input: 'a < b && c > d', expected: 'a < b && c > d' }
|
||||
];
|
||||
|
||||
testCases.forEach(({ input, expected }) => {
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
||||
it('should preserve HTML comments', () => {
|
||||
const input = '<!-- This is a comment with <generics> -->';
|
||||
const expected = '<!-- This is a comment with <generics> -->';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle pre-escaped content correctly', () => {
|
||||
// If content already has HTML entities, they get preserved during escaping
|
||||
// (they don't match our angle bracket patterns)
|
||||
const input = 'Already escaped: <String> and new: <Integer>';
|
||||
const expected = 'Already escaped: <String> and new: <Integer>';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
|
||||
// When unescaping, ALL < and > entities get unescaped
|
||||
// This is correct behavior because CKEditor expects raw HTML
|
||||
const unescaped = widget.testUnescape(expected);
|
||||
expect(unescaped).toBe('Already escaped: <String> and new: <Integer>');
|
||||
});
|
||||
|
||||
it('should handle HTML with inline code containing generics', () => {
|
||||
const input = '<p>Use <code>Vec<T></code> for dynamic arrays</p>';
|
||||
const expected = '<p>Use <code>Vec<T></code> for dynamic arrays</p>';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle self-closing HTML tags', () => {
|
||||
const input = '<img src="test.jpg" /><br/><CustomType>';
|
||||
const expected = '<img src="test.jpg" /><br/><CustomType>';
|
||||
expect(widget.testEscape(input)).toBe(expected);
|
||||
});
|
||||
});
|
|
@ -14,6 +14,7 @@ import type FNote from "../../entities/fnote.js";
|
|||
import { PopupEditor, ClassicEditor, EditorWatchdog, type CKTextEditor, type MentionFeed, type WatchdogConfig, EditorConfig } from "@triliumnext/ckeditor5";
|
||||
import "@triliumnext/ckeditor5/index.css";
|
||||
import { updateTemplateCache } from "./ckeditor/snippets.js";
|
||||
import { SANITIZER_DEFAULT_ALLOWED_TAGS } from "@triliumnext/commons";
|
||||
|
||||
const TPL = /*html*/`
|
||||
<div class="note-detail-editable-text note-detail-printable">
|
||||
|
@ -239,12 +240,26 @@ export default class EditableTextTypeWidget extends AbstractTextTypeWidget {
|
|||
const blob = await note.getBlob();
|
||||
|
||||
await this.spacedUpdate.allowUpdateWithoutChange(async () => {
|
||||
const data = blob?.content || "";
|
||||
const newContentLanguage = this.note?.getLabelValue("language");
|
||||
if (this.contentLanguage !== newContentLanguage) {
|
||||
await this.reinitializeWithData(data);
|
||||
} else {
|
||||
this.watchdog.editor?.setData(data);
|
||||
let data = blob?.content || "";
|
||||
|
||||
try {
|
||||
// Escape generic type syntax that could be mistaken for HTML tags
|
||||
data = this.escapeGenericTypeSyntax(data);
|
||||
|
||||
const newContentLanguage = this.note?.getLabelValue("language");
|
||||
if (this.contentLanguage !== newContentLanguage) {
|
||||
await this.reinitializeWithData(data);
|
||||
} else {
|
||||
this.watchdog.editor?.setData(data);
|
||||
}
|
||||
} catch (error) {
|
||||
logError(`Failed to set editor data for note ${note.noteId}: ${error}`);
|
||||
// Try to set the data without escaping as a fallback
|
||||
try {
|
||||
this.watchdog.editor?.setData(blob?.content || "");
|
||||
} catch (fallbackError) {
|
||||
logError(`Fallback also failed: ${fallbackError}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -255,7 +270,10 @@ export default class EditableTextTypeWidget extends AbstractTextTypeWidget {
|
|||
return;
|
||||
}
|
||||
|
||||
const content = this.watchdog.editor?.getData() ?? "";
|
||||
let content = this.watchdog.editor?.getData() ?? "";
|
||||
|
||||
// Unescape any generic type syntax we escaped earlier
|
||||
content = this.unescapeGenericTypeSyntax(content);
|
||||
|
||||
// if content is only tags/whitespace (typically <p> </p>), then just make it empty,
|
||||
// this is important when setting a new note to code
|
||||
|
@ -488,9 +506,23 @@ export default class EditableTextTypeWidget extends AbstractTextTypeWidget {
|
|||
return;
|
||||
}
|
||||
|
||||
this.watchdog.destroy();
|
||||
await this.createEditor();
|
||||
this.watchdog.editor?.setData(data);
|
||||
try {
|
||||
this.watchdog.destroy();
|
||||
await this.createEditor();
|
||||
// Data should already be escaped when this is called from doRefresh
|
||||
// but we ensure it's escaped in case this is called from elsewhere
|
||||
const escapedData = data.includes('<') ? data : this.escapeGenericTypeSyntax(data);
|
||||
this.watchdog.editor?.setData(escapedData);
|
||||
} catch (error) {
|
||||
logError(`Failed to reinitialize editor with data: ${error}`);
|
||||
// Try to create editor without data and set it later
|
||||
try {
|
||||
await this.createEditor();
|
||||
this.watchdog.editor?.setData("");
|
||||
} catch (fallbackError) {
|
||||
logError(`Failed to create empty editor: ${fallbackError}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async reinitialize() {
|
||||
|
@ -530,6 +562,109 @@ export default class EditableTextTypeWidget extends AbstractTextTypeWidget {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes generic type syntax (e.g., <String, Type>) that could be mistaken for HTML tags.
|
||||
* This prevents CKEditor from trying to parse them as DOM elements.
|
||||
*/
|
||||
private escapeGenericTypeSyntax(content: string): string {
|
||||
if (!content) return content;
|
||||
|
||||
try {
|
||||
// Count replacements for debugging
|
||||
let replacementCount = 0;
|
||||
|
||||
// Get the allowed HTML tags from user settings, with fallback to default list
|
||||
let allowedTags;
|
||||
try {
|
||||
const allowedHtmlTagsOption = options.get("allowedHtmlTags");
|
||||
allowedTags = allowedHtmlTagsOption ? JSON.parse(allowedHtmlTagsOption) : SANITIZER_DEFAULT_ALLOWED_TAGS;
|
||||
} catch (e) {
|
||||
// Fallback to default list if option doesn't exist or is invalid JSON
|
||||
allowedTags = SANITIZER_DEFAULT_ALLOWED_TAGS;
|
||||
}
|
||||
|
||||
// Convert to lowercase for case-insensitive comparison
|
||||
const htmlTags = new Set(
|
||||
allowedTags.map((tag: string) => tag.toLowerCase())
|
||||
);
|
||||
|
||||
// Add custom Trilium element that must be preserved
|
||||
htmlTags.add('includenote');
|
||||
|
||||
// More comprehensive escaping strategy:
|
||||
// We'll use a different approach - parse through the content and identify
|
||||
// what looks like HTML vs what looks like generic type syntax
|
||||
|
||||
// First pass: Protect actual HTML tags by temporarily replacing them
|
||||
const htmlProtectionMap = new Map<string, string>();
|
||||
let protectionCounter = 0;
|
||||
|
||||
// Protect complete HTML tags (opening, closing, and self-closing)
|
||||
content = content.replace(/<\/?([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*)?\/?>|<!--[\s\S]*?-->/g, (match, tagName) => {
|
||||
// Check if this is a comment
|
||||
if (match.startsWith('<!--')) {
|
||||
const placeholder = `__HTML_PROTECTED_${protectionCounter++}__`;
|
||||
htmlProtectionMap.set(placeholder, match);
|
||||
return placeholder;
|
||||
}
|
||||
|
||||
// Extract just the tag name (first word after < or </)
|
||||
const actualTagName = tagName?.toLowerCase();
|
||||
|
||||
// Only protect if it's a known HTML tag
|
||||
if (actualTagName && htmlTags.has(actualTagName)) {
|
||||
const placeholder = `__HTML_PROTECTED_${protectionCounter++}__`;
|
||||
htmlProtectionMap.set(placeholder, match);
|
||||
return placeholder;
|
||||
}
|
||||
|
||||
// Not a known HTML tag, leave it for escaping
|
||||
return match;
|
||||
});
|
||||
|
||||
// Second pass: Now escape all remaining angle brackets that weren't protected
|
||||
// These are likely generic type syntax or other non-HTML patterns
|
||||
content = content.replace(/</g, () => {
|
||||
replacementCount++;
|
||||
return '<';
|
||||
});
|
||||
content = content.replace(/>/g, () => {
|
||||
replacementCount++;
|
||||
return '>';
|
||||
});
|
||||
|
||||
// Third pass: Restore the protected HTML tags
|
||||
htmlProtectionMap.forEach((originalHtml, placeholder) => {
|
||||
content = content.replace(placeholder, originalHtml);
|
||||
});
|
||||
|
||||
if (replacementCount > 0) {
|
||||
logInfo(`Escaped ${replacementCount} potential generic type patterns in note content`);
|
||||
}
|
||||
|
||||
return content;
|
||||
} catch (error) {
|
||||
logError(`Failed to escape generic type syntax: ${error}`);
|
||||
return content; // Return original content if escaping fails
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unescapes generic type syntax that was previously escaped.
|
||||
* This restores the original content when saving.
|
||||
*/
|
||||
private unescapeGenericTypeSyntax(content: string): string {
|
||||
if (!content) return content;
|
||||
|
||||
// Simply replace all < with < and > with >
|
||||
// This is the correct behavior because CKEditor expects raw HTML
|
||||
// Any entities that should display as literal text need to be double-escaped
|
||||
content = content.replace(/</g, '<');
|
||||
content = content.replace(/>/g, '>');
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
buildTouchBarCommand(data: CommandListenerData<"buildTouchBar">) {
|
||||
const { TouchBar, buildIcon } = data;
|
||||
const { TouchBarSegmentedControl, TouchBarGroup, TouchBarButton } = TouchBar;
|
||||
|
|
Loading…
Add table
Reference in a new issue