feat(editor): try to have ckeditor not crash when handling stranger tags

This commit is contained in:
perf3ct 2025-09-01 20:39:02 -07:00
parent 83be42f4ea
commit ba1c6ba0e1
No known key found for this signature in database
GPG key ID: 569C4EEC436F5232
2 changed files with 440 additions and 10 deletions

View file

@ -0,0 +1,295 @@
import { describe, it, expect } from 'vitest';
// Mock the EditableTextTypeWidget class to test the escaping methods
class MockEditableTextTypeWidget {
private escapeGenericTypeSyntax(content: string): string {
if (!content) return content;
try {
// Count replacements for debugging
let replacementCount = 0;
// List of known HTML tags that should NOT be escaped
const htmlTags = new Set([
// Block elements
'div', 'p', 'section', 'article', 'nav', 'header', 'footer', 'aside', 'main',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'ul', 'ol', 'li', 'dl', 'dt', 'dd',
'table', 'thead', 'tbody', 'tfoot', 'tr', 'td', 'th', 'caption', 'colgroup', 'col',
'form', 'fieldset', 'legend',
'blockquote', 'pre', 'figure', 'figcaption',
'address', 'hr', 'br',
// Inline elements
'span', 'a', 'strong', 'em', 'b', 'i', 'u', 's', 'del', 'ins',
'small', 'mark', 'sub', 'sup',
'code', 'kbd', 'samp', 'var',
'q', 'cite', 'abbr', 'dfn', 'time',
'img', 'picture', 'source',
// Form elements
'input', 'textarea', 'button', 'select', 'option', 'optgroup',
'label', 'output', 'progress', 'meter',
// Media elements
'audio', 'video', 'track',
'canvas', 'svg',
// Metadata elements
'head', 'title', 'meta', 'link', 'style', 'script', 'noscript',
'base',
// Document structure
'html', 'body',
// Other common elements
'iframe', 'embed', 'object', 'param',
'details', 'summary', 'dialog',
'template', 'slot',
'area', 'map',
'ruby', 'rt', 'rp',
'bdi', 'bdo', 'wbr',
'data', 'datalist',
'keygen', 'output',
'math', 'mi', 'mo', 'mn', 'ms', 'mtext', 'mspace',
// Custom elements that Trilium uses
'includenote'
]);
// More comprehensive escaping strategy:
// We'll use a different approach - parse through the content and identify
// what looks like HTML vs what looks like generic type syntax
// First pass: Protect actual HTML tags by temporarily replacing them
const htmlProtectionMap = new Map<string, string>();
let protectionCounter = 0;
// Protect complete HTML tags (opening, closing, and self-closing)
content = content.replace(/<\/?([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*)?\/?>|<!--[\s\S]*?-->/g, (match, tagName) => {
// Check if this is a comment
if (match.startsWith('<!--')) {
const placeholder = `__HTML_PROTECTED_${protectionCounter++}__`;
htmlProtectionMap.set(placeholder, match);
return placeholder;
}
// Extract just the tag name (first word after < or </)
const actualTagName = tagName?.toLowerCase();
// Only protect if it's a known HTML tag
if (actualTagName && htmlTags.has(actualTagName)) {
const placeholder = `__HTML_PROTECTED_${protectionCounter++}__`;
htmlProtectionMap.set(placeholder, match);
return placeholder;
}
// Not a known HTML tag, leave it for escaping
return match;
});
// Second pass: Now escape all remaining angle brackets that weren't protected
// These are likely generic type syntax or other non-HTML patterns
content = content.replace(/</g, () => {
replacementCount++;
return '&lt;';
});
content = content.replace(/>/g, () => {
replacementCount++;
return '&gt;';
});
// Third pass: Restore the protected HTML tags
htmlProtectionMap.forEach((originalHtml, placeholder) => {
content = content.replace(placeholder, originalHtml);
});
return content;
} catch (error) {
return content;
}
}
private unescapeGenericTypeSyntax(content: string): string {
if (!content) return content;
// Simply replace all &lt; with < and &gt; with >
// This is the correct behavior because CKEditor expects raw HTML
// Any entities that should display as literal text need to be double-escaped
content = content.replace(/&lt;/g, '<');
content = content.replace(/&gt;/g, '>');
return content;
}
testEscape(content: string): string {
return this.escapeGenericTypeSyntax(content);
}
testUnescape(content: string): string {
return this.unescapeGenericTypeSyntax(content);
}
}
describe('EditableTextTypeWidget - Generic Type Escaping', () => {
const widget = new MockEditableTextTypeWidget();
it('should escape generic type syntax with comma after tag name', () => {
const input = '<PhaseType,';
const expected = '&lt;PhaseType,';
expect(widget.testEscape(input)).toBe(expected);
});
it('should escape generic type syntax with two type parameters', () => {
const input = '<String, PromptTemplate>';
const expected = '&lt;String, PromptTemplate&gt;';
expect(widget.testEscape(input)).toBe(expected);
});
it('should escape nested generic types', () => {
const input = 'HashMap<String, List<Item>>';
const expected = 'HashMap&lt;String, List&lt;Item&gt;&gt;';
expect(widget.testEscape(input)).toBe(expected);
});
it('should not escape valid HTML tags', () => {
const input = '<div class="test">content</div>';
const expected = '<div class="test">content</div>';
expect(widget.testEscape(input)).toBe(expected);
});
it('should not escape HTML tags with attributes containing commas', () => {
const input = '<div data-values="1,2,3">content</div>';
const expected = '<div data-values="1,2,3">content</div>';
expect(widget.testEscape(input)).toBe(expected);
});
it('should handle mixed content with both generics and HTML', () => {
const input = 'Code: <String, Type> and HTML: <div>content</div>';
const expected = 'Code: &lt;String, Type&gt; and HTML: <div>content</div>';
expect(widget.testEscape(input)).toBe(expected);
});
it('should properly unescape escaped content', () => {
const testCases = [
'<String, PromptTemplate>',
'<PhaseType,',
'<RiskLevel, f32>',
'HashMap<String, List<Item>>',
'Mixed: <String, Type> and <div>HTML</div>'
];
testCases.forEach(original => {
const escaped = widget.testEscape(original);
const unescaped = widget.testUnescape(escaped);
expect(unescaped).toBe(original);
});
});
it('should handle empty or null content', () => {
expect(widget.testEscape('')).toBe('');
expect(widget.testUnescape('')).toBe('');
});
it('should handle content with multiple generic patterns', () => {
const input = `
pub struct LlmService {
anthropic_client: Option<AnthropicClient>,
openai_client: Option<OpenAIClient>,
templates: HashMap<String, PromptTemplate>,
}
`;
const escaped = widget.testEscape(input);
expect(escaped).toContain('Option&lt;AnthropicClient&gt;');
expect(escaped).toContain('Option&lt;OpenAIClient&gt;');
expect(escaped).toContain('HashMap&lt;String, PromptTemplate&gt;');
expect(escaped).not.toContain('Option<AnthropicClient>');
const unescaped = widget.testUnescape(escaped);
expect(unescaped).toBe(input);
});
// Additional test cases for problematic patterns
it('should escape Rust Box<dyn patterns', () => {
const input = 'Box<dyn';
const expected = 'Box&lt;dyn';
expect(widget.testEscape(input)).toBe(expected);
});
it('should escape Rust trait object syntax', () => {
const input = 'Box<dyn Error>';
const expected = 'Box&lt;dyn Error&gt;';
expect(widget.testEscape(input)).toBe(expected);
});
it('should escape complex Rust trait bounds', () => {
const input = 'Box<dyn Error + Send + Sync>';
const expected = 'Box&lt;dyn Error + Send + Sync&gt;';
expect(widget.testEscape(input)).toBe(expected);
});
it('should escape incomplete generic syntax with string', () => {
const input = '<string,';
const expected = '&lt;string,';
expect(widget.testEscape(input)).toBe(expected);
});
it('should escape C++ style templates', () => {
const testCases = [
{ input: 'std::vector<int>', expected: 'std::vector&lt;int&gt;' },
{ input: 'std::map<string, vector<int>>', expected: 'std::map&lt;string, vector&lt;int&gt;&gt;' },
{ input: 'unique_ptr<Widget>', expected: 'unique_ptr&lt;Widget&gt;' }
];
testCases.forEach(({ input, expected }) => {
expect(widget.testEscape(input)).toBe(expected);
});
});
it('should handle edge cases with standalone angle brackets', () => {
const testCases = [
{ input: '<', expected: '&lt;' },
{ input: '>', expected: '&gt;' },
{ input: '< >', expected: '&lt; &gt;' },
{ input: '<>', expected: '&lt;&gt;' },
{ input: '<<>>', expected: '&lt;&lt;&gt;&gt;' },
{ input: 'a < b && c > d', expected: 'a &lt; b && c &gt; d' }
];
testCases.forEach(({ input, expected }) => {
expect(widget.testEscape(input)).toBe(expected);
});
});
it('should preserve HTML comments', () => {
const input = '<!-- This is a comment with <generics> -->';
const expected = '<!-- This is a comment with <generics> -->';
expect(widget.testEscape(input)).toBe(expected);
});
it('should handle pre-escaped content correctly', () => {
// If content already has HTML entities, they get preserved during escaping
// (they don't match our angle bracket patterns)
const input = 'Already escaped: &lt;String&gt; and new: <Integer>';
const expected = 'Already escaped: &lt;String&gt; and new: &lt;Integer&gt;';
expect(widget.testEscape(input)).toBe(expected);
// When unescaping, ALL &lt; and &gt; entities get unescaped
// This is correct behavior because CKEditor expects raw HTML
const unescaped = widget.testUnescape(expected);
expect(unescaped).toBe('Already escaped: <String> and new: <Integer>');
});
it('should handle HTML with inline code containing generics', () => {
const input = '<p>Use <code>Vec<T></code> for dynamic arrays</p>';
const expected = '<p>Use <code>Vec&lt;T&gt;</code> for dynamic arrays</p>';
expect(widget.testEscape(input)).toBe(expected);
});
it('should handle self-closing HTML tags', () => {
const input = '<img src="test.jpg" /><br/><CustomType>';
const expected = '<img src="test.jpg" /><br/>&lt;CustomType&gt;';
expect(widget.testEscape(input)).toBe(expected);
});
});

View file

@ -14,6 +14,7 @@ import type FNote from "../../entities/fnote.js";
import { PopupEditor, ClassicEditor, EditorWatchdog, type CKTextEditor, type MentionFeed, type WatchdogConfig, EditorConfig } from "@triliumnext/ckeditor5";
import "@triliumnext/ckeditor5/index.css";
import { updateTemplateCache } from "./ckeditor/snippets.js";
import { SANITIZER_DEFAULT_ALLOWED_TAGS } from "@triliumnext/commons";
const TPL = /*html*/`
<div class="note-detail-editable-text note-detail-printable">
@ -239,13 +240,27 @@ export default class EditableTextTypeWidget extends AbstractTextTypeWidget {
const blob = await note.getBlob();
await this.spacedUpdate.allowUpdateWithoutChange(async () => {
const data = blob?.content || "";
let data = blob?.content || "";
try {
// Escape generic type syntax that could be mistaken for HTML tags
data = this.escapeGenericTypeSyntax(data);
const newContentLanguage = this.note?.getLabelValue("language");
if (this.contentLanguage !== newContentLanguage) {
await this.reinitializeWithData(data);
} else {
this.watchdog.editor?.setData(data);
}
} catch (error) {
logError(`Failed to set editor data for note ${note.noteId}: ${error}`);
// Try to set the data without escaping as a fallback
try {
this.watchdog.editor?.setData(blob?.content || "");
} catch (fallbackError) {
logError(`Fallback also failed: ${fallbackError}`);
}
}
});
}
@ -255,7 +270,10 @@ export default class EditableTextTypeWidget extends AbstractTextTypeWidget {
return;
}
const content = this.watchdog.editor?.getData() ?? "";
let content = this.watchdog.editor?.getData() ?? "";
// Unescape any generic type syntax we escaped earlier
content = this.unescapeGenericTypeSyntax(content);
// if content is only tags/whitespace (typically <p>&nbsp;</p>), then just make it empty,
// this is important when setting a new note to code
@ -488,9 +506,23 @@ export default class EditableTextTypeWidget extends AbstractTextTypeWidget {
return;
}
try {
this.watchdog.destroy();
await this.createEditor();
this.watchdog.editor?.setData(data);
// Data should already be escaped when this is called from doRefresh
// but we ensure it's escaped in case this is called from elsewhere
const escapedData = data.includes('&lt;') ? data : this.escapeGenericTypeSyntax(data);
this.watchdog.editor?.setData(escapedData);
} catch (error) {
logError(`Failed to reinitialize editor with data: ${error}`);
// Try to create editor without data and set it later
try {
await this.createEditor();
this.watchdog.editor?.setData("");
} catch (fallbackError) {
logError(`Failed to create empty editor: ${fallbackError}`);
}
}
}
async reinitialize() {
@ -530,6 +562,109 @@ export default class EditableTextTypeWidget extends AbstractTextTypeWidget {
}
}
/**
* Escapes generic type syntax (e.g., <String, Type>) that could be mistaken for HTML tags.
* This prevents CKEditor from trying to parse them as DOM elements.
*/
private escapeGenericTypeSyntax(content: string): string {
if (!content) return content;
try {
// Count replacements for debugging
let replacementCount = 0;
// Get the allowed HTML tags from user settings, with fallback to default list
let allowedTags;
try {
const allowedHtmlTagsOption = options.get("allowedHtmlTags");
allowedTags = allowedHtmlTagsOption ? JSON.parse(allowedHtmlTagsOption) : SANITIZER_DEFAULT_ALLOWED_TAGS;
} catch (e) {
// Fallback to default list if option doesn't exist or is invalid JSON
allowedTags = SANITIZER_DEFAULT_ALLOWED_TAGS;
}
// Convert to lowercase for case-insensitive comparison
const htmlTags = new Set(
allowedTags.map((tag: string) => tag.toLowerCase())
);
// Add custom Trilium element that must be preserved
htmlTags.add('includenote');
// More comprehensive escaping strategy:
// We'll use a different approach - parse through the content and identify
// what looks like HTML vs what looks like generic type syntax
// First pass: Protect actual HTML tags by temporarily replacing them
const htmlProtectionMap = new Map<string, string>();
let protectionCounter = 0;
// Protect complete HTML tags (opening, closing, and self-closing)
content = content.replace(/<\/?([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*)?\/?>|<!--[\s\S]*?-->/g, (match, tagName) => {
// Check if this is a comment
if (match.startsWith('<!--')) {
const placeholder = `__HTML_PROTECTED_${protectionCounter++}__`;
htmlProtectionMap.set(placeholder, match);
return placeholder;
}
// Extract just the tag name (first word after < or </)
const actualTagName = tagName?.toLowerCase();
// Only protect if it's a known HTML tag
if (actualTagName && htmlTags.has(actualTagName)) {
const placeholder = `__HTML_PROTECTED_${protectionCounter++}__`;
htmlProtectionMap.set(placeholder, match);
return placeholder;
}
// Not a known HTML tag, leave it for escaping
return match;
});
// Second pass: Now escape all remaining angle brackets that weren't protected
// These are likely generic type syntax or other non-HTML patterns
content = content.replace(/</g, () => {
replacementCount++;
return '&lt;';
});
content = content.replace(/>/g, () => {
replacementCount++;
return '&gt;';
});
// Third pass: Restore the protected HTML tags
htmlProtectionMap.forEach((originalHtml, placeholder) => {
content = content.replace(placeholder, originalHtml);
});
if (replacementCount > 0) {
logInfo(`Escaped ${replacementCount} potential generic type patterns in note content`);
}
return content;
} catch (error) {
logError(`Failed to escape generic type syntax: ${error}`);
return content; // Return original content if escaping fails
}
}
/**
* Unescapes generic type syntax that was previously escaped.
* This restores the original content when saving.
*/
private unescapeGenericTypeSyntax(content: string): string {
if (!content) return content;
// Simply replace all &lt; with < and &gt; with >
// This is the correct behavior because CKEditor expects raw HTML
// Any entities that should display as literal text need to be double-escaped
content = content.replace(/&lt;/g, '<');
content = content.replace(/&gt;/g, '>');
return content;
}
buildTouchBarCommand(data: CommandListenerData<"buildTouchBar">) {
const { TouchBar, buildIcon } = data;
const { TouchBarSegmentedControl, TouchBarGroup, TouchBarButton } = TouchBar;