From d92dc66e301d91ddb704867367f6a930ead25452 Mon Sep 17 00:00:00 2001 From: Alf Eaton Date: Fri, 1 Sep 2023 12:09:11 +0100 Subject: [PATCH] Protect special characters in pasted HTML (#14476) GitOrigin-RevId: 7288f6696ed9af78c1ea4cc94d0e8022da90aacf --- .../extensions/visual/paste-html.ts | 46 +++++++++++++++++++ ...demirror-editor-visual-paste-html.spec.tsx | 33 +++++++++++++ 2 files changed, 79 insertions(+) diff --git a/services/web/frontend/js/features/source-editor/extensions/visual/paste-html.ts b/services/web/frontend/js/features/source-editor/extensions/visual/paste-html.ts index d6fb4e3c2e..739944f8c7 100644 --- a/services/web/frontend/js/features/source-editor/extensions/visual/paste-html.ts +++ b/services/web/frontend/js/features/source-editor/extensions/visual/paste-html.ts @@ -99,6 +99,9 @@ const htmlToLaTeX = (documentElement: HTMLElement) => { // pre-process table elements processTables(documentElement) + // protect special characters in non-LaTeX text nodes + protectSpecialCharacters(documentElement) + processMatchedElements(documentElement) const text = documentElement.textContent @@ -124,6 +127,49 @@ const processWhitespace = (documentElement: HTMLElement) => { } } +const isElementNode = (node: Node): node is HTMLElement => + node.nodeType === Node.ELEMENT_NODE + +// TODO: negative lookbehind once Safari supports it +const specialCharacterRegExp = /(^|[^\\])([#$%&~_^\\{}])/g + +const specialCharacterReplacer = ( + _match: string, + prefix: string, + char: string +) => { + if (char === '\\') { + // convert `\` to `\textbackslash{}`, preserving subsequent whitespace + char = 'textbackslash{}' + } + + return `${prefix}\\${char}` +} + +const protectSpecialCharacters = (documentElement: HTMLElement) => { + const walker = document.createTreeWalker( + documentElement, + NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT, + node => + isElementNode(node) && node.tagName === 'CODE' + ? NodeFilter.FILTER_REJECT + : NodeFilter.FILTER_ACCEPT + ) + + for (let node = walker.nextNode(); node; node = walker.nextNode()) { + if (node.nodeType === Node.TEXT_NODE) { + const text = node.textContent + if (text) { + // replace non-backslash-prefixed characters + node.textContent = text.replaceAll( + specialCharacterRegExp, + specialCharacterReplacer + ) + } + } + } +} + const processMatchedElements = (documentElement: HTMLElement) => { for (const item of selectors) { for (const element of documentElement.querySelectorAll( diff --git a/services/web/test/frontend/features/source-editor/components/codemirror-editor-visual-paste-html.spec.tsx b/services/web/test/frontend/features/source-editor/components/codemirror-editor-visual-paste-html.spec.tsx index 32ebc48313..ba78917323 100644 --- a/services/web/test/frontend/features/source-editor/components/codemirror-editor-visual-paste-html.spec.tsx +++ b/services/web/test/frontend/features/source-editor/components/codemirror-editor-visual-paste-html.spec.tsx @@ -363,4 +363,37 @@ describe(' paste HTML in Visual mode', function () { cy.get('@content').should('have.text', 'foo') cy.get('.ol-cm-command-textbf').should('have.length', 0) }) + + it('protects special characters', function () { + mountEditor() + + const data = 'foo & bar~baz' + + const clipboardData = new DataTransfer() + clipboardData.setData('text/html', data) + cy.get('@content').trigger('paste', { clipboardData }) + + cy.get('@content').should('have.text', 'foo & bar~baz') + cy.get('.ol-cm-character').should('have.length', 2) + }) + + it('does not protect special characters in code blocks', function () { + mountEditor() + + const data = 'foo & bar~baz \\textbf{foo}' + + const clipboardData = new DataTransfer() + clipboardData.setData('text/html', data) + cy.get('@content').trigger('paste', { clipboardData }) + + cy.get('@content').should( + 'have.text', + 'foo & bar~baz \\verb|\\textbf{foo}|' + ) + + cy.get('.cm-line').eq(0).type('{Enter}') + cy.get('@content').should('have.text', 'foo & bar~baz \\textbf{foo}') + cy.get('.ol-cm-character').should('have.length', 2) + cy.get('.ol-cm-command-verb').should('have.length', 1) + }) })