From 8282aa85f23e7580d28adf2b65eae8023b606064 Mon Sep 17 00:00:00 2001 From: Mathias Jakobsen Date: Tue, 2 Sep 2025 12:35:07 +0100 Subject: [PATCH] Merge pull request #28128 from overleaf/mj-client-word-count-magic-comments [web] Support some texcount magic comments in client side word count GitOrigin-RevId: 12749876edbf7940ca9adfc63fcc7e787894e727 --- .../utils/count-words-in-file.ts | 43 ++++++++++++++++++- .../utils/count-words-in-file.test.ts | 30 ++++++++++++- .../word-count-with-ignored-sections.tex | 9 ++++ 3 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 services/web/test/frontend/features/word-count-modal/utils/word-count-with-ignored-sections.tex diff --git a/services/web/frontend/js/features/word-count-modal/utils/count-words-in-file.ts b/services/web/frontend/js/features/word-count-modal/utils/count-words-in-file.ts index 8187bb39a2..a917d54014 100644 --- a/services/web/frontend/js/features/word-count-modal/utils/count-words-in-file.ts +++ b/services/web/frontend/js/features/word-count-modal/utils/count-words-in-file.ts @@ -139,6 +139,10 @@ export const countWordsInFile = ( const headMatcher = NodeType.match< (nodeRef: SyntaxNodeRef) => boolean | void >({ + Comment(nodeRef) { + handleComment(nodeRef) + return false + }, Title(nodeRef) { data.headers++ iterateNode(nodeRef, 'header') @@ -149,6 +153,10 @@ export const countWordsInFile = ( const bodyMatcher = NodeType.match< (nodeRef: SyntaxNodeRef) => boolean | void >({ + Comment(nodeRef) { + handleComment(nodeRef) + return false + }, Normal(nodeRef) { textNodes.push({ from: nodeRef.from, @@ -265,17 +273,48 @@ export const countWordsInFile = ( const preambleExtent = findPreambleExtent(tree) + const state = { + skipping: false, + } + + const TC_REGEX = /^%+TC:\s*(\w+)\s*/i + + const handleComment = (nodeRef: SyntaxNodeRef) => { + const comment = content.slice(nodeRef.from, nodeRef.to) + + // look for TeXcount instructions + const match = TC_REGEX.exec(comment) + if (match) { + switch (match[1].toLowerCase()) { + case 'ignore': + state.skipping = true + break + case 'endignore': + state.skipping = false + break + default: + break + } + } + } + tree.iterate({ from: 0, to: preambleExtent.to, - enter(nodeRef) { + enter(nodeRef: SyntaxNodeRef) { + if (state.skipping && !nodeRef.type.is('Comment')) { + return false + } return headMatcher(nodeRef.type)?.(nodeRef) }, }) tree.iterate({ from: preambleExtent.to, - enter(nodeRef) { + enter(nodeRef: SyntaxNodeRef) { + if (state.skipping && !nodeRef.type.is('Comment')) { + return false + } return bodyMatcher(nodeRef.type)?.(nodeRef) }, }) diff --git a/services/web/test/frontend/features/word-count-modal/utils/count-words-in-file.test.ts b/services/web/test/frontend/features/word-count-modal/utils/count-words-in-file.test.ts index 32c7b72c7d..1a360e6386 100644 --- a/services/web/test/frontend/features/word-count-modal/utils/count-words-in-file.test.ts +++ b/services/web/test/frontend/features/word-count-modal/utils/count-words-in-file.test.ts @@ -5,7 +5,7 @@ import { WordCountData } from '@/features/word-count-modal/components/word-count import { createSegmenters } from '@/features/word-count-modal/utils/segmenters' import { expect } from 'chai' -describe('word count', function () { +describe('word-count', function () { beforeEach(async function () { this.data = { encode: '', @@ -35,6 +35,10 @@ describe('word count', function () { path.join(__dirname, 'word-count.tex'), 'utf-8' ), + 'word-count-with-ignored-sections.tex': await readFile( + path.join(__dirname, 'word-count-with-ignored-sections.tex'), + 'utf-8' + ), } this.projectSnapshot = { @@ -69,4 +73,28 @@ describe('word count', function () { textWords: 42, }) }) + + it('skips ignored sections', function () { + countWordsInFile( + this.data, + this.projectSnapshot, + 'word-count-with-ignored-sections.tex', + this.segmenters + ) + + expect(this.data).to.deep.include({ + abstractCharacters: 0, + abstractWords: 0, + captionCharacters: 0, + captionWords: 0, + footnoteCharacters: 0, + footnoteWords: 0, + headCharacters: 0, + headWords: 0, + otherCharacters: 0, + otherWords: 0, + textCharacters: 10, + textWords: 3, + }) + }) }) diff --git a/services/web/test/frontend/features/word-count-modal/utils/word-count-with-ignored-sections.tex b/services/web/test/frontend/features/word-count-modal/utils/word-count-with-ignored-sections.tex new file mode 100644 index 0000000000..4324fd5cea --- /dev/null +++ b/services/web/test/frontend/features/word-count-modal/utils/word-count-with-ignored-sections.tex @@ -0,0 +1,9 @@ +foo +%TC:ignore +bar +%TC:endignore +baz +%TC: ignore +qux +%%TC: endignore +quux \ No newline at end of file