Merge pull request #31239 from overleaf/em-bibtex-grammar

Improvements to the lezer grammar for BibTeX

GitOrigin-RevId: 33ece6f3c6a34380aa7b2a46ff624aff3ccf8a10
This commit is contained in:
Eric Mc Sween
2026-02-04 08:20:51 -05:00
committed by Copybot
parent ee6047ccd6
commit d9cf720566
5 changed files with 189 additions and 112 deletions

View File

@@ -1,10 +1,21 @@
import { LRLanguage } from '@codemirror/language'
import { indentNodeProp, LRLanguage } from '@codemirror/language'
import { parser } from '../../lezer-bibtex/bibtex.mjs'
import { bibtexEntryCompletions } from './completions/snippets'
export const BibTeXLanguage = LRLanguage.define({
name: 'bibtex',
parser,
parser: parser.configure({
props: [
// Disable the autoindent from delimited nodes
indentNodeProp.add({
EntryBody: () => null,
StringBody: () => null,
PreambleBody: () => null,
CommentBody: () => null,
StringLiteral: () => null,
}),
],
}),
languageData: {
autocomplete: bibtexEntryCompletions,
},

View File

@@ -1,11 +1,12 @@
import { syntaxTree } from '@codemirror/language'
import { Diagnostic, LintSource } from '@codemirror/lint'
import {
Declaration,
EntryName,
EntryTypeName,
Entry,
EntryCommand,
EntryBody,
EntryType,
FieldName,
Other,
Comment,
} from '../../lezer-bibtex/bibtex.terms.mjs'
import { SyntaxNodeRef } from '@lezer/common'
import { EditorState } from '@codemirror/state'
@@ -36,7 +37,7 @@ export const bibtexLintSource: LintSource = view => {
if (fileLintingDisabled) {
return false
}
if (node.type.is(Other)) {
if (node.type.is(Comment)) {
// Content between declaration. Can be linter directive
const content = view.state.sliceDoc(node.from, node.to).trim()
if (content === '%%novalidate') {
@@ -50,7 +51,7 @@ export const bibtexLintSource: LintSource = view => {
if (lintingCurrentlyDisabled) {
return false
}
if (node.type.is(Declaration)) {
if (node.type.is(Entry)) {
diagnostics.push(...checkRequiredFields(node, view.state))
return false
}
@@ -190,25 +191,29 @@ const checkRequiredFields = (
// syntax tree
const node = nodeRef.node
const entryNameNode = node.getChild(EntryName)
if (!entryNameNode) {
const entryCommandNode = node.getChild(EntryCommand)
if (!entryCommandNode) {
return []
}
const entryTypeNameNode = entryNameNode.getChild(EntryTypeName)
if (!entryTypeNameNode) {
const entryTypeNode = entryCommandNode.getChild(EntryType)
if (!entryTypeNode) {
return []
}
const entryTypeName = state
.sliceDoc(entryTypeNameNode.from, entryTypeNameNode.to)
const entryType = state
.sliceDoc(entryTypeNode.from, entryTypeNode.to)
.toLowerCase()
const environment = bibEntryValidationRules.get(entryTypeName)
const environment = bibEntryValidationRules.get(entryType)
if (!environment) {
return []
}
const requiredFields = environment.requiredAttributes
const actualFieldNodes = node.getChildren('Field')
const entryBodyNode = node.getChild(EntryBody)
if (!entryBodyNode) {
return []
}
const actualFieldNodes = entryBodyNode.getChildren('Field')
const actualFieldNames = new Set(
actualFieldNodes
.map(fieldNode => fieldNode.getChild(FieldName))
@@ -249,9 +254,9 @@ const checkRequiredFields = (
return [
{
from: entryNameNode.from,
to: entryNameNode.to,
message: createErrorMessage(missingFields, entryTypeName, state),
from: entryCommandNode.from,
to: entryCommandNode.to,
message: createErrorMessage(missingFields, entryType, state),
severity: 'warning',
},
]

View File

@@ -1,100 +1,141 @@
@top Bibliography {
(Other | Declaration | CommentDeclaration | PreambleDeclaration | StringDeclaration)*
}
@tokens {
Other { ![@ \t\n] ![@]* }
whiteSpace { $[\t\n ]+ }
Identifier { ![,{} \t\n%]+ }
StringName { $[a-zA-Z:_] $[a-zA-Z:_0-9-]* }
FieldName {$[a-zA-Z-_0-9+]+}
LiteralString {
'"' (!["] | "\\" _)* '"'?
// Anything outside of an entry at the top level is a BibTeX free-form comment
Junk { ![@ \t\n%] ![@]* }
// BibLaTeX supports comments starting with a % and extending to the end of
// the line
Comment { "%" ![\n]* }
// See https://metacpan.org/release/AMBS/Text-BibTeX-0.91/view/btparse/doc/bt_language.pod
identifierSymbol { $[!$&*+./:;<>?^`_|] | "[" | "]" | "-" }
identifier {
(@asciiLetter | identifierSymbol)
(@asciiLetter | @digit | identifierSymbol)*
}
EntryTypeName { $[a-zA-Z]+ }
Number { @digit+ }
StringKeyword {"@"$[Ss]$[Tt]$[Rr]$[Ii]$[Nn]$[Gg]}
PreambleKeyword {"@"$[Pp]$[Rr]$[Ee]$[Aa]$[Mm]$[Bb]$[Ll]$[Ee]}
CommentKeyword {"@"$[Cc]$[Oo]$[Mm]$[Mm]$[Ee]$[Nn]$[Tt]}
CommentContents { ![@} \t\n] ![}@]* }
"{"[closedBy="}"]
"}"[openedBy="{"]
"@" "\"" "," "#" "@string"
EntryType { identifier }
// Citation keys can start with a digit
CitationKey { (@asciiLetter | @digit | identifierSymbol)+ }
NumberLiteral { @digit+ }
"#" "="
}
@skip { whiteSpace }
StringDeclaration {
StringKeyword "{"
Field<StringName>*
"}"
@external specialize {EntryType} specializeEntryType from "./tokens.mjs" {
stringKeyword,
preambleKeyword,
commentKeyword
}
PreambleDeclaration {
PreambleKeyword "{"
Expression
"}"
@skip { whiteSpace | Comment }
@top Bibliography {
(Junk | Entry | CommentEntry | PreambleEntry | StringEntry)*
}
CommentDeclaration {
CommentKeyword "{"
CommentContents*
"}"
StringEntry { StringCommand StringBody }
PreambleEntry { PreambleCommand PreambleBody }
CommentEntry { CommentCommand CommentBody }
Entry { EntryCommand EntryBody }
EntryCommand { "@" EntryType }
EntryBody { commandBody<entryContents> }
StringCommand { "@" stringKeyword }
StringBody { commandBody<fields> }
PreambleCommand { "@" preambleKeyword }
PreambleBody { commandBody<Value> }
CommentCommand { "@" commentKeyword }
commandBody<contents> {
(
BodyOpen[closedBy="BodyClose"] { "{" }
contents
BodyClose[openedBy="BodyOpen"] { "}" }
) | (
BodyOpen[closedBy="BodyClose"] { "(" }
contents
BodyClose[openedBy="BodyOpen"] { ")" }
)
}
EntryName {
"@" EntryTypeName
}
entryContents { CitationKey ("," fields)? }
Declaration {
EntryName "{"
Identifier
fieldEntry {
("," Field<FieldName> )
}*
("," )?
"}"
}
fields { (Field ("," Field)* ","?)? }
Field<Name> {
Name "=" Expression
}
Expression {
BracedString |
Number |
StringConcatenation
}
Field { FieldName "=" Value }
FieldName { identifier }
Value { simpleValue ("#" simpleValue)* }
simpleValue { StringLiteral | NumberLiteral | StringName }
StringName { identifier }
@local tokens {
OpenBracedContents[closedBy="}", @name="{"] {"{"}
CloseBracedContents[openedBy="{", @name="}"] {"}"}
@else nonClosingBracedContents
strOpenBrace { "{" }
strCloseBrace { "}" }
strOpenParen { "(" }
strCloseParen { ")" }
strQuote { "\"" }
@else strOther
}
@skip {}{
BracedStringContents {
(
nonClosingBracedContents |
nestedBracedString {
OpenBracedContents
BracedStringContents
CloseBracedContents
}
@skip {} {
StringLiteral {
(
StringOpen[closedBy="StringClose"] { "\"" }
StringContents { quotedStringContents }
StringClose[openedBy="StringOpen"] { strQuote }
) | (
StringOpen[closedBy="StringClose"] { "{" }
StringContents { bracedStringContents }
StringClose[openedBy="StringOpen"] { strCloseBrace }
)
}
CommentBody {
(
BodyOpen[closedBy="BodyClose"] { "{" }
CommentContents { bracedStringContents }
BodyClose[openedBy="BodyOpen"] { strCloseBrace }
) | (
BodyOpen[closedBy="BodyClose"] { "(" }
CommentContents { parenthesizedStringContents }
BodyClose[openedBy="BodyOpen"] { strCloseParen }
)
}
quotedStringContents {
(
strOpenBrace bracedStringContents strCloseBrace |
strOpenParen |
strCloseParen |
strCloseBrace |
strOther
)*
}
BracedString {
"{" BracedStringContents CloseBracedContents
}
}
parenthesizedStringContents {
(
strOpenBrace bracedStringContents strCloseBrace |
strQuote |
strOpenParen |
strCloseBrace |
strOther
)*
}
bracedStringContents {
(
strOpenBrace bracedStringContents strCloseBrace |
strQuote |
strOpenParen |
strCloseParen |
strOther
)*
}
}
@precedence { concatenation @left }
StringConcatenation {
StringConcatenation !concatenation "#" StringConcatenation |
LiteralString |
StringName
}
@external propSource highlighting from "./highlight.mjs"
@external propSource highlighting from "./highlight.mjs"

View File

@@ -1,18 +1,17 @@
import { styleTags, tags as t } from '@lezer/highlight'
export const highlighting = styleTags({
LiteralString: t.string,
'BracedString/...': t.string,
Number: t.number,
Identifier: t.name,
'EntryName/...': t.keyword,
FieldName: t.attributeName,
Expression: t.attributeValue,
'#': t.operator,
StringKeyword: t.keyword,
PreambleKeyword: t.keyword,
CommentKeyword: t.keyword,
CommentContents: t.comment,
'EntryCommand/...': t.keyword,
'StringCommand/...': t.keyword,
'PreambleCommand/...': t.keyword,
'CommentCommand/...': t.keyword,
FieldName: t.name,
CitationKey: t.name,
'StringLiteral/...': t.string,
NumberLiteral: t.number,
StringName: t.variableName,
Other: t.comment,
'#': t.operator,
Comment: t.comment,
'CommentBody/...': t.comment,
Junk: t.comment,
})

View File

@@ -0,0 +1,21 @@
import {
stringKeyword,
preambleKeyword,
commentKeyword,
} from './bibtex.terms.mjs'
/**
* @param {string} identifier
*/
export function specializeEntryType(identifier) {
const lowercased = identifier.toLowerCase()
switch (lowercased) {
case 'string':
return stringKeyword
case 'preamble':
return preambleKeyword
case 'comment':
return commentKeyword
}
return -1
}