From d9cf72056647076739a40ce6449b1f62df2f5ad3 Mon Sep 17 00:00:00 2001 From: Eric Mc Sween <5454374+emcsween@users.noreply.github.com> Date: Wed, 4 Feb 2026 08:20:51 -0500 Subject: [PATCH] Merge pull request #31239 from overleaf/em-bibtex-grammar Improvements to the lezer grammar for BibTeX GitOrigin-RevId: 33ece6f3c6a34380aa7b2a46ff624aff3ccf8a10 --- .../languages/bibtex/bibtex-language.ts | 15 +- .../source-editor/languages/bibtex/linting.ts | 39 ++-- .../source-editor/lezer-bibtex/bibtex.grammar | 201 +++++++++++------- .../source-editor/lezer-bibtex/highlight.mjs | 25 ++- .../source-editor/lezer-bibtex/tokens.mjs | 21 ++ 5 files changed, 189 insertions(+), 112 deletions(-) create mode 100644 services/web/frontend/js/features/source-editor/lezer-bibtex/tokens.mjs diff --git a/services/web/frontend/js/features/source-editor/languages/bibtex/bibtex-language.ts b/services/web/frontend/js/features/source-editor/languages/bibtex/bibtex-language.ts index c9dbdb4d26..02f57ca92e 100644 --- a/services/web/frontend/js/features/source-editor/languages/bibtex/bibtex-language.ts +++ b/services/web/frontend/js/features/source-editor/languages/bibtex/bibtex-language.ts @@ -1,10 +1,21 @@ -import { LRLanguage } from '@codemirror/language' +import { indentNodeProp, LRLanguage } from '@codemirror/language' import { parser } from '../../lezer-bibtex/bibtex.mjs' import { bibtexEntryCompletions } from './completions/snippets' export const BibTeXLanguage = LRLanguage.define({ name: 'bibtex', - parser, + parser: parser.configure({ + props: [ + // Disable the autoindent from delimited nodes + indentNodeProp.add({ + EntryBody: () => null, + StringBody: () => null, + PreambleBody: () => null, + CommentBody: () => null, + StringLiteral: () => null, + }), + ], + }), languageData: { autocomplete: bibtexEntryCompletions, }, diff --git a/services/web/frontend/js/features/source-editor/languages/bibtex/linting.ts b/services/web/frontend/js/features/source-editor/languages/bibtex/linting.ts index 6b92fffc30..b6c553cafd 100644 --- a/services/web/frontend/js/features/source-editor/languages/bibtex/linting.ts +++ b/services/web/frontend/js/features/source-editor/languages/bibtex/linting.ts @@ -1,11 +1,12 @@ import { syntaxTree } from '@codemirror/language' import { Diagnostic, LintSource } from '@codemirror/lint' import { - Declaration, - EntryName, - EntryTypeName, + Entry, + EntryCommand, + EntryBody, + EntryType, FieldName, - Other, + Comment, } from '../../lezer-bibtex/bibtex.terms.mjs' import { SyntaxNodeRef } from '@lezer/common' import { EditorState } from '@codemirror/state' @@ -36,7 +37,7 @@ export const bibtexLintSource: LintSource = view => { if (fileLintingDisabled) { return false } - if (node.type.is(Other)) { + if (node.type.is(Comment)) { // Content between declaration. Can be linter directive const content = view.state.sliceDoc(node.from, node.to).trim() if (content === '%%novalidate') { @@ -50,7 +51,7 @@ export const bibtexLintSource: LintSource = view => { if (lintingCurrentlyDisabled) { return false } - if (node.type.is(Declaration)) { + if (node.type.is(Entry)) { diagnostics.push(...checkRequiredFields(node, view.state)) return false } @@ -190,25 +191,29 @@ const checkRequiredFields = ( // syntax tree const node = nodeRef.node - const entryNameNode = node.getChild(EntryName) - if (!entryNameNode) { + const entryCommandNode = node.getChild(EntryCommand) + if (!entryCommandNode) { return [] } - const entryTypeNameNode = entryNameNode.getChild(EntryTypeName) - if (!entryTypeNameNode) { + const entryTypeNode = entryCommandNode.getChild(EntryType) + if (!entryTypeNode) { return [] } - const entryTypeName = state - .sliceDoc(entryTypeNameNode.from, entryTypeNameNode.to) + const entryType = state + .sliceDoc(entryTypeNode.from, entryTypeNode.to) .toLowerCase() - const environment = bibEntryValidationRules.get(entryTypeName) + const environment = bibEntryValidationRules.get(entryType) if (!environment) { return [] } const requiredFields = environment.requiredAttributes - const actualFieldNodes = node.getChildren('Field') + const entryBodyNode = node.getChild(EntryBody) + if (!entryBodyNode) { + return [] + } + const actualFieldNodes = entryBodyNode.getChildren('Field') const actualFieldNames = new Set( actualFieldNodes .map(fieldNode => fieldNode.getChild(FieldName)) @@ -249,9 +254,9 @@ const checkRequiredFields = ( return [ { - from: entryNameNode.from, - to: entryNameNode.to, - message: createErrorMessage(missingFields, entryTypeName, state), + from: entryCommandNode.from, + to: entryCommandNode.to, + message: createErrorMessage(missingFields, entryType, state), severity: 'warning', }, ] diff --git a/services/web/frontend/js/features/source-editor/lezer-bibtex/bibtex.grammar b/services/web/frontend/js/features/source-editor/lezer-bibtex/bibtex.grammar index cee791bd3e..74b8d3892d 100644 --- a/services/web/frontend/js/features/source-editor/lezer-bibtex/bibtex.grammar +++ b/services/web/frontend/js/features/source-editor/lezer-bibtex/bibtex.grammar @@ -1,100 +1,141 @@ -@top Bibliography { - (Other | Declaration | CommentDeclaration | PreambleDeclaration | StringDeclaration)* -} - @tokens { - Other { ![@ \t\n] ![@]* } whiteSpace { $[\t\n ]+ } - Identifier { ![,{} \t\n%]+ } - StringName { $[a-zA-Z:_] $[a-zA-Z:_0-9-]* } - FieldName {$[a-zA-Z-_0-9+]+} - LiteralString { - '"' (!["] | "\\" _)* '"'? + + // Anything outside of an entry at the top level is a BibTeX free-form comment + Junk { ![@ \t\n%] ![@]* } + + // BibLaTeX supports comments starting with a % and extending to the end of + // the line + Comment { "%" ![\n]* } + + // See https://metacpan.org/release/AMBS/Text-BibTeX-0.91/view/btparse/doc/bt_language.pod + identifierSymbol { $[!$&*+./:;<>?^`_|] | "[" | "]" | "-" } + identifier { + (@asciiLetter | identifierSymbol) + (@asciiLetter | @digit | identifierSymbol)* } - EntryTypeName { $[a-zA-Z]+ } - Number { @digit+ } - StringKeyword {"@"$[Ss]$[Tt]$[Rr]$[Ii]$[Nn]$[Gg]} - PreambleKeyword {"@"$[Pp]$[Rr]$[Ee]$[Aa]$[Mm]$[Bb]$[Ll]$[Ee]} - CommentKeyword {"@"$[Cc]$[Oo]$[Mm]$[Mm]$[Ee]$[Nn]$[Tt]} - CommentContents { ![@} \t\n] ![}@]* } - "{"[closedBy="}"] - "}"[openedBy="{"] - "@" "\"" "," "#" "@string" + + EntryType { identifier } + + // Citation keys can start with a digit + CitationKey { (@asciiLetter | @digit | identifierSymbol)+ } + + NumberLiteral { @digit+ } + + "#" "=" } -@skip { whiteSpace } - -StringDeclaration { - StringKeyword "{" - Field* - "}" +@external specialize {EntryType} specializeEntryType from "./tokens.mjs" { + stringKeyword, + preambleKeyword, + commentKeyword } -PreambleDeclaration { - PreambleKeyword "{" - Expression - "}" +@skip { whiteSpace | Comment } + +@top Bibliography { + (Junk | Entry | CommentEntry | PreambleEntry | StringEntry)* } -CommentDeclaration { - CommentKeyword "{" - CommentContents* - "}" +StringEntry { StringCommand StringBody } +PreambleEntry { PreambleCommand PreambleBody } +CommentEntry { CommentCommand CommentBody } +Entry { EntryCommand EntryBody } + +EntryCommand { "@" EntryType } +EntryBody { commandBody } +StringCommand { "@" stringKeyword } +StringBody { commandBody } +PreambleCommand { "@" preambleKeyword } +PreambleBody { commandBody } +CommentCommand { "@" commentKeyword } + +commandBody { + ( + BodyOpen[closedBy="BodyClose"] { "{" } + contents + BodyClose[openedBy="BodyOpen"] { "}" } + ) | ( + BodyOpen[closedBy="BodyClose"] { "(" } + contents + BodyClose[openedBy="BodyOpen"] { ")" } + ) } -EntryName { - "@" EntryTypeName -} +entryContents { CitationKey ("," fields)? } -Declaration { - EntryName "{" - Identifier - - fieldEntry { - ("," Field ) - }* - ("," )? - "}" -} +fields { (Field ("," Field)* ","?)? } -Field { - Name "=" Expression -} - -Expression { - BracedString | - Number | - StringConcatenation -} +Field { FieldName "=" Value } +FieldName { identifier } +Value { simpleValue ("#" simpleValue)* } +simpleValue { StringLiteral | NumberLiteral | StringName } +StringName { identifier } @local tokens { - OpenBracedContents[closedBy="}", @name="{"] {"{"} - CloseBracedContents[openedBy="{", @name="}"] {"}"} - @else nonClosingBracedContents + strOpenBrace { "{" } + strCloseBrace { "}" } + strOpenParen { "(" } + strCloseParen { ")" } + strQuote { "\"" } + @else strOther } -@skip {}{ - BracedStringContents { - ( - nonClosingBracedContents | - nestedBracedString { - OpenBracedContents - BracedStringContents - CloseBracedContents - } +@skip {} { + StringLiteral { + ( + StringOpen[closedBy="StringClose"] { "\"" } + StringContents { quotedStringContents } + StringClose[openedBy="StringOpen"] { strQuote } + ) | ( + StringOpen[closedBy="StringClose"] { "{" } + StringContents { bracedStringContents } + StringClose[openedBy="StringOpen"] { strCloseBrace } + ) + } + + CommentBody { + ( + BodyOpen[closedBy="BodyClose"] { "{" } + CommentContents { bracedStringContents } + BodyClose[openedBy="BodyOpen"] { strCloseBrace } + ) | ( + BodyOpen[closedBy="BodyClose"] { "(" } + CommentContents { parenthesizedStringContents } + BodyClose[openedBy="BodyOpen"] { strCloseParen } + ) + } + + quotedStringContents { + ( + strOpenBrace bracedStringContents strCloseBrace | + strOpenParen | + strCloseParen | + strCloseBrace | + strOther )* -} - BracedString { - "{" BracedStringContents CloseBracedContents - } + } + + parenthesizedStringContents { + ( + strOpenBrace bracedStringContents strCloseBrace | + strQuote | + strOpenParen | + strCloseBrace | + strOther + )* + + } + + bracedStringContents { + ( + strOpenBrace bracedStringContents strCloseBrace | + strQuote | + strOpenParen | + strCloseParen | + strOther + )* + } } -@precedence { concatenation @left } - -StringConcatenation { - StringConcatenation !concatenation "#" StringConcatenation | - LiteralString | - StringName -} - -@external propSource highlighting from "./highlight.mjs" \ No newline at end of file +@external propSource highlighting from "./highlight.mjs" diff --git a/services/web/frontend/js/features/source-editor/lezer-bibtex/highlight.mjs b/services/web/frontend/js/features/source-editor/lezer-bibtex/highlight.mjs index 1f611bd86e..b05eac3140 100644 --- a/services/web/frontend/js/features/source-editor/lezer-bibtex/highlight.mjs +++ b/services/web/frontend/js/features/source-editor/lezer-bibtex/highlight.mjs @@ -1,18 +1,17 @@ import { styleTags, tags as t } from '@lezer/highlight' export const highlighting = styleTags({ - LiteralString: t.string, - 'BracedString/...': t.string, - Number: t.number, - Identifier: t.name, - 'EntryName/...': t.keyword, - FieldName: t.attributeName, - Expression: t.attributeValue, - '#': t.operator, - StringKeyword: t.keyword, - PreambleKeyword: t.keyword, - CommentKeyword: t.keyword, - CommentContents: t.comment, + 'EntryCommand/...': t.keyword, + 'StringCommand/...': t.keyword, + 'PreambleCommand/...': t.keyword, + 'CommentCommand/...': t.keyword, + FieldName: t.name, + CitationKey: t.name, + 'StringLiteral/...': t.string, + NumberLiteral: t.number, StringName: t.variableName, - Other: t.comment, + '#': t.operator, + Comment: t.comment, + 'CommentBody/...': t.comment, + Junk: t.comment, }) diff --git a/services/web/frontend/js/features/source-editor/lezer-bibtex/tokens.mjs b/services/web/frontend/js/features/source-editor/lezer-bibtex/tokens.mjs new file mode 100644 index 0000000000..90f55da76a --- /dev/null +++ b/services/web/frontend/js/features/source-editor/lezer-bibtex/tokens.mjs @@ -0,0 +1,21 @@ +import { + stringKeyword, + preambleKeyword, + commentKeyword, +} from './bibtex.terms.mjs' + +/** + * @param {string} identifier + */ +export function specializeEntryType(identifier) { + const lowercased = identifier.toLowerCase() + switch (lowercased) { + case 'string': + return stringKeyword + case 'preamble': + return preambleKeyword + case 'comment': + return commentKeyword + } + return -1 +}