Merge pull request #28128 from overleaf/mj-client-word-count-magic-comments

[web] Support some texcount magic comments in client side word count

GitOrigin-RevId: 12749876edbf7940ca9adfc63fcc7e787894e727
This commit is contained in:
Mathias Jakobsen
2025-09-02 12:35:07 +01:00
committed by Copybot
parent 1625cc7848
commit 8282aa85f2
3 changed files with 79 additions and 3 deletions

View File

@@ -139,6 +139,10 @@ export const countWordsInFile = (
const headMatcher = NodeType.match<
(nodeRef: SyntaxNodeRef) => boolean | void
>({
Comment(nodeRef) {
handleComment(nodeRef)
return false
},
Title(nodeRef) {
data.headers++
iterateNode(nodeRef, 'header')
@@ -149,6 +153,10 @@ export const countWordsInFile = (
const bodyMatcher = NodeType.match<
(nodeRef: SyntaxNodeRef) => boolean | void
>({
Comment(nodeRef) {
handleComment(nodeRef)
return false
},
Normal(nodeRef) {
textNodes.push({
from: nodeRef.from,
@@ -265,17 +273,48 @@ export const countWordsInFile = (
const preambleExtent = findPreambleExtent(tree)
const state = {
skipping: false,
}
const TC_REGEX = /^%+TC:\s*(\w+)\s*/i
const handleComment = (nodeRef: SyntaxNodeRef) => {
const comment = content.slice(nodeRef.from, nodeRef.to)
// look for TeXcount instructions
const match = TC_REGEX.exec(comment)
if (match) {
switch (match[1].toLowerCase()) {
case 'ignore':
state.skipping = true
break
case 'endignore':
state.skipping = false
break
default:
break
}
}
}
tree.iterate({
from: 0,
to: preambleExtent.to,
enter(nodeRef) {
enter(nodeRef: SyntaxNodeRef) {
if (state.skipping && !nodeRef.type.is('Comment')) {
return false
}
return headMatcher(nodeRef.type)?.(nodeRef)
},
})
tree.iterate({
from: preambleExtent.to,
enter(nodeRef) {
enter(nodeRef: SyntaxNodeRef) {
if (state.skipping && !nodeRef.type.is('Comment')) {
return false
}
return bodyMatcher(nodeRef.type)?.(nodeRef)
},
})

View File

@@ -5,7 +5,7 @@ import { WordCountData } from '@/features/word-count-modal/components/word-count
import { createSegmenters } from '@/features/word-count-modal/utils/segmenters'
import { expect } from 'chai'
describe('word count', function () {
describe('word-count', function () {
beforeEach(async function () {
this.data = {
encode: '',
@@ -35,6 +35,10 @@ describe('word count', function () {
path.join(__dirname, 'word-count.tex'),
'utf-8'
),
'word-count-with-ignored-sections.tex': await readFile(
path.join(__dirname, 'word-count-with-ignored-sections.tex'),
'utf-8'
),
}
this.projectSnapshot = {
@@ -69,4 +73,28 @@ describe('word count', function () {
textWords: 42,
})
})
it('skips ignored sections', function () {
countWordsInFile(
this.data,
this.projectSnapshot,
'word-count-with-ignored-sections.tex',
this.segmenters
)
expect(this.data).to.deep.include({
abstractCharacters: 0,
abstractWords: 0,
captionCharacters: 0,
captionWords: 0,
footnoteCharacters: 0,
footnoteWords: 0,
headCharacters: 0,
headWords: 0,
otherCharacters: 0,
otherWords: 0,
textCharacters: 10,
textWords: 3,
})
})
})

View File

@@ -0,0 +1,9 @@
foo
%TC:ignore
bar
%TC:endignore
baz
%TC: ignore
qux
%%TC: endignore
quux