From ddfdafd54c95b86fc35aa2b18b1dc93d09f7d68a Mon Sep 17 00:00:00 2001 From: Mathias Jakobsen Date: Tue, 25 Jun 2024 09:09:05 +0100 Subject: [PATCH] Merge pull request #19050 from overleaf/mj-lezer-parse-comments-in-unknown-commands [lezer] Introduce hasMoreArguments token for argument parsing GitOrigin-RevId: a5898a2be01c19a39de15c784f184fe61140799a --- .../source-editor/lezer-latex/latex.grammar | 5 ++-- .../source-editor/lezer-latex/tokens.mjs | 23 +++++++++++++------ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/services/web/frontend/js/features/source-editor/lezer-latex/latex.grammar b/services/web/frontend/js/features/source-editor/lezer-latex/latex.grammar index b2853905ed..9ed0073b97 100644 --- a/services/web/frontend/js/features/source-editor/lezer-latex/latex.grammar +++ b/services/web/frontend/js/features/source-editor/lezer-latex/latex.grammar @@ -694,12 +694,13 @@ MathCommand { | KnownCtrlSym } -@external tokens endOfArgumentListTokenizer from "./tokens.mjs" { +@external tokens argumentListTokenizer from "./tokens.mjs" { + hasMoreArguments, endOfArguments } MathUnknownCommand { - CtrlSeq (optionalWhitespace? MathArgument)* optionalWhitespace? endOfArguments + CtrlSeq (hasMoreArguments optionalWhitespace? MathArgument)* endOfArguments | CtrlSym } diff --git a/services/web/frontend/js/features/source-editor/lezer-latex/tokens.mjs b/services/web/frontend/js/features/source-editor/lezer-latex/tokens.mjs index 84a86da6f6..25ae9d83fd 100644 --- a/services/web/frontend/js/features/source-editor/lezer-latex/tokens.mjs +++ b/services/web/frontend/js/features/source-editor/lezer-latex/tokens.mjs @@ -81,8 +81,11 @@ import { MultiColumnCtrlSeq, // Marker for end of argument lists endOfArguments, + hasMoreArguments, } from './latex.terms.mjs' +const MAX_ARGUMENT_LOOKAHEAD = 100 + function nameChar(ch) { // we accept A-Z a-z 0-9 * + @ in environment names return ( @@ -256,14 +259,20 @@ const CHAR_TAB = _char('\t') const CHAR_SPACE = _char(' ') const CHAR_NEWLINE = _char('\n') -export const endOfArgumentListTokenizer = new ExternalTokenizer( +export const argumentListTokenizer = new ExternalTokenizer( input => { - const { next } = input - if (next === CHAR_SPACE || next === CHAR_TAB) { - return - } - if (next !== CHAR_OPEN_BRACE) { - input.acceptToken(endOfArguments) + for (let i = 0; i < MAX_ARGUMENT_LOOKAHEAD; ++i) { + const next = input.peek(i) + if (next === CHAR_SPACE || next === CHAR_TAB) { + continue + } + if (next === CHAR_OPEN_BRACE) { + input.acceptToken(hasMoreArguments) + return + } else { + input.acceptToken(endOfArguments) + return + } } }, { contextual: false, fallback: true }