Add more detail to word count UI (#25400)

GitOrigin-RevId: 3521f2ea03332e46ef1bac634ce0650cdce01249
This commit is contained in:
Alf Eaton
2025-05-13 13:36:04 +01:00
committed by Copybot
parent bd67b4ca13
commit fb0cfbe0bb
11 changed files with 474 additions and 117 deletions

View File

@@ -32,6 +32,7 @@
"about_to_leave_projects": "",
"about_to_trash_projects": "",
"about_writefull": "",
"abstract": "",
"accept_and_continue": "",
"accept_change": "",
"accept_change_error_description": "",
@@ -209,6 +210,7 @@
"cant_see_what_youre_looking_for_question": "",
"caption_above": "",
"caption_below": "",
"captions": "",
"card_details": "",
"card_details_are_not_valid": "",
"card_must_be_authenticated_by_3dsecure": "",
@@ -588,6 +590,7 @@
"footer_about_us": "",
"footer_contact_us": "",
"footer_navigation": "",
"footnotes": "",
"for_enterprise": "",
"for_government": "",
"for_individuals_and_groups": "",
@@ -1723,6 +1726,7 @@
"test_configuration_successful": "",
"tex_live_version": "",
"texgpt": "",
"text": "",
"thank_you": "",
"thank_you_exclamation": "",
"thank_you_for_your_feedback": "",
@@ -1860,6 +1864,7 @@
"tooltip_show_filetree": "",
"tooltip_show_panel": "",
"tooltip_show_pdf": "",
"total": "",
"total_due_in_x_days": "",
"total_due_today": "",
"total_per_month": "",

View File

@@ -11,8 +11,8 @@ import { debugConsole } from '@/utils/debugging'
import { signalWithTimeout } from '@/utils/abort-signal'
import { isMainFile } from '@/features/pdf-preview/util/editor-files'
import { countWordsInFile } from '@/features/word-count-modal/utils/count-words-in-file'
import { WordCounts } from '@/features/word-count-modal/components/word-counts'
import { createSegmenters } from '@/features/word-count-modal/utils/segmenters'
import { WordCountsClient } from './word-counts-client'
export const WordCountClient: FC = () => {
const [loading, setLoading] = useState(true)
@@ -59,7 +59,8 @@ export const WordCountClient: FC = () => {
footnoteWords: 0,
footnoteCharacters: 0,
outside: 0,
outsideCharacters: 0,
otherWords: 0,
otherCharacters: 0,
headers: 0,
elements: 0,
mathInline: 0,
@@ -99,7 +100,7 @@ export const WordCountClient: FC = () => {
<>
{loading && !error && <WordCountLoading />}
{error && <WordCountError />}
{data && <WordCounts data={data} source="client" />}
{data && <WordCountsClient data={data} />}
</>
)
}

View File

@@ -20,6 +20,6 @@ export type WordCountData = ServerWordCountData & {
footnoteCharacters: number
abstractWords: number
abstractCharacters: number
// outsideWords: number
outsideCharacters: number
otherWords: number
otherCharacters: number
}

View File

@@ -42,7 +42,7 @@ export const WordCountServer: FC = () => {
<>
{loading && !error && <WordCountLoading />}
{error && <WordCountError />}
{data && <WordCounts data={data} source="server" />}
{data && <WordCounts data={data} />}
</>
)
}

View File

@@ -0,0 +1,138 @@
import { FC, useMemo } from 'react'
import { WordCountData } from '@/features/word-count-modal/components/word-count-data'
import { useTranslation } from 'react-i18next'
import { Container, Row, Col, Form } from 'react-bootstrap-5'
import OLNotification from '@/features/ui/components/ol/ol-notification'
import usePersistedState from '@/shared/hooks/use-persisted-state'
export const WordCountsClient: FC<{ data: WordCountData }> = ({ data }) => {
const { t } = useTranslation()
const [included, setIncluded] = usePersistedState<string[]>(
'word-count-total',
['text']
)
const items = useMemo(() => {
return [
{
key: 'text',
label: t('text'),
words: data.textWords,
chars: data.textCharacters,
},
{
key: 'headers',
label: t('headers'),
words: data.headWords,
chars: data.headCharacters,
},
{
key: 'abstract',
label: t('abstract'),
words: data.abstractWords,
chars: data.abstractCharacters,
},
{
key: 'captions',
label: t('captions'),
words: data.captionWords,
chars: data.captionCharacters,
},
{
key: 'footnotes',
label: t('footnotes'),
words: data.footnoteWords,
chars: data.footnoteCharacters,
},
{
key: 'other',
label: t('other'),
words: data.otherWords,
chars: data.otherCharacters,
},
]
}, [data, t])
const totals = useMemo(() => {
const totals = {
words: 0,
chars: 0,
}
for (const item of items) {
if (included.includes(item.key)) {
totals.words += item.words
totals.chars += item.chars
}
}
return totals
}, [included, items])
return (
<Container fluid>
{data.messages && (
<Row>
<Col xs={12}>
<OLNotification
type="error"
content={
<p style={{ whiteSpace: 'pre-wrap' }}>{data.messages}</p>
}
/>
</Col>
</Row>
)}
{items.map(item => (
<Row
key={item.key}
style={{
borderBottom: '1px solid #eee',
padding: 5,
marginBottom: 5,
}}
>
<Col
style={{
display: 'flex',
alignItems: 'top',
justifyContent: 'space-between',
}}
>
<Form.Check
type="checkbox"
id={`word-count-${item.key}`}
label={item.label}
checked={included.includes(item.key)}
onChange={event =>
setIncluded(prevValue => {
return event.target.checked
? prevValue.concat(item.key)
: prevValue.filter(key => key !== item.key)
})
}
aria-label={`Include ${item.label} in total`}
/>
</Col>
<Col>
{item.words} words
<br />
{item.chars} chars
</Col>
</Row>
))}
<Row>
<Col style={{ textAlign: 'right' }}>
<span style={{ fontWeight: 'bold' }}>
{t('total')}: {totals.words} words
<br />
{totals.chars} chars
</span>
</Col>
</Row>
</Container>
)
}

View File

@@ -1,22 +1,12 @@
import {
ServerWordCountData,
WordCountData,
} from '@/features/word-count-modal/components/word-count-data'
import { useTranslation } from 'react-i18next'
import { FC } from 'react'
import { ServerWordCountData } from '@/features/word-count-modal/components/word-count-data'
import { useTranslation } from 'react-i18next'
import { Container, Row, Col } from 'react-bootstrap-5'
import OLNotification from '@/features/ui/components/ol/ol-notification'
export const WordCounts: FC<
| {
data: ServerWordCountData
source: 'server'
}
| {
data: WordCountData
source: 'client'
}
> = ({ data, source }) => {
export const WordCounts: FC<{
data: ServerWordCountData
}> = ({ data }) => {
const { t } = useTranslation()
return (
@@ -34,69 +24,32 @@ export const WordCounts: FC<
</Row>
)}
{source === 'client' ? (
<>
<Row>
<Col xs={4}>
<div className="pull-right">Text:</div>
</Col>
<Col xs={6}>{data.textWords}</Col>
</Row>
<Row>
<Col xs={4}>
<div className="pull-right">{t('total_words')}:</div>
</Col>
<Col xs={6}>{data.textWords}</Col>
</Row>
<Row>
<Col xs={4}>
<div className="pull-right">{t('headers')}:</div>
</Col>
<Col xs={6}>{data.headers}</Col>
</Row>
<Row>
<Col xs={4}>
<div className="pull-right">Headers:</div>
</Col>
<Col xs={6}>{data.headWords}</Col>
</Row>
<Row>
<Col xs={4}>
<div className="pull-right">{t('math_inline')}:</div>
</Col>
<Col xs={6}>{data.mathInline}</Col>
</Row>
<Row>
<Col xs={4}>
<div className="pull-right">Captions:</div>
</Col>
<Col xs={6}>{data.captionWords}</Col>
</Row>
<Row>
<Col xs={4}>
<div className="pull-right">Footnotes:</div>
</Col>
<Col xs={6}>{data.footnoteWords}</Col>
</Row>
</>
) : (
<Row>
<Col xs={4}>
<div className="pull-right">{t('total_words')}:</div>
</Col>
<Col xs={6}>{data.textWords}</Col>
</Row>
)}
{source === 'server' && (
<>
<Row>
<Col xs={4}>
<div className="pull-right">{t('headers')}:</div>
</Col>
<Col xs={6}>{data.headers}</Col>
</Row>
<Row>
<Col xs={4}>
<div className="pull-right">{t('math_inline')}:</div>
</Col>
<Col xs={6}>{data.mathInline}</Col>
</Row>
<Row>
<Col xs={4}>
<div className="pull-right">{t('math_display')}:</div>
</Col>
<Col xs={6}>{data.mathDisplay}</Col>
</Row>
</>
)}
<Row>
<Col xs={4}>
<div className="pull-right">{t('math_display')}:</div>
</Col>
<Col xs={6}>{data.mathDisplay}</Col>
</Row>
</Container>
)
}

View File

@@ -1,4 +1,3 @@
import { ProjectSnapshot } from '@/infrastructure/project-snapshot'
import { LaTeXLanguage } from '@/features/source-editor/languages/latex/latex-language'
import { WordCountData } from '@/features/word-count-modal/components/word-count-data'
import { NodeType, SyntaxNodeRef } from '@lezer/common'
@@ -8,7 +7,7 @@ import { Segmenters } from './segmenters'
const whiteSpaceRe = /^\s$/
type Context = 'text' | 'header' | 'abstract' | 'caption' | 'footnote'
type Context = 'text' | 'header' | 'abstract' | 'caption' | 'footnote' | 'other'
const counters: Record<
Context,
@@ -37,10 +36,15 @@ const counters: Record<
word: 'footnoteWords',
character: 'footnoteCharacters',
},
other: {
word: 'otherWords',
character: 'otherCharacters',
},
}
// https://en.wikibooks.org/wiki/LaTeX/Special_Characters#Escaped_codes
const replacementsMap: Map<string, string> = new Map([
// LaTeX commands that create part of a word
// LaTeX commands that create characters
['aa', 'å'],
['AA', 'Å'],
['ae', 'æ'],
@@ -63,19 +67,36 @@ const replacementsMap: Map<string, string> = new Map([
['NG', 'Ŋ'],
['i', 'ı'],
['j', 'ȷ'],
// reserved characters
['&', '&'],
['$', '$'],
['%', '%'],
['#', '#'],
['_', '_'],
// modifier commands for the character in the arguments
['H', 'a'],
['c', 'a'],
['d', 'a'],
['k', 'a'],
['v', 'a'],
['{', '{'],
['}', '}'],
// modifier commands for the subsequent character(s) (in braces)
['H', 'ő'], // long Hungarian umlaut (double acute)
['b', 'o'], // bar under the letter
['c', 'ç'], // cedilla
['d', 'o'], // dot under the letter
['k', 'ą'], // ogonek
['r', 'å'], // ring over the letter
['t', 'o͡o'], // "tie" over the two letters
['u', 'ŏ'], // breve over the letter
['v', 'š'], // caron/háček over the letter
// modifier symbols for the subsequent character
["'", ''],
['^', ''],
['"', ''],
['=', ''],
['.', ''],
["'", ''], // acute
['^', ''], // circumflex
['"', ''], // umlaut, trema or dieresis
['=', ''], // macron accent (a bar over the letter)
['.', ''], // dot over the letter
['`', ''], // grave
['~', ''], // tilde
// commands that create text
['TeX', 'TeX'],
['LaTeX', 'LaTeX'],
['textbackslash', '\\'],
])
type TextNode = {
@@ -87,7 +108,7 @@ type TextNode = {
export const countWordsInFile = (
data: WordCountData,
projectSnapshot: ProjectSnapshot,
projectSnapshot: { getDocContents(path: string): string | null },
docPath: string,
segmenters: Segmenters
) => {
@@ -106,10 +127,9 @@ export const countWordsInFile = (
const iterateNode = (nodeRef: SyntaxNodeRef, context: Context = 'text') => {
const previousContext = currentContext
currentContext = context
const { node } = nodeRef
node.cursor().iterate(childNodeRef => {
nodeRef.node.cursor().iterate(childNodeRef => {
// TODO: a better way to iterate only descendants?
if (childNodeRef.node !== node) {
if (childNodeRef.node !== nodeRef.node) {
return bodyMatcher(childNodeRef.type)?.(childNodeRef)
}
})
@@ -141,34 +161,72 @@ export const countWordsInFile = (
const child = nodeRef.node.getChild('UnknownCommand')
if (!child) return
const grandchild = child.getChild('CtrlSeq') ?? child.getChild('CtrlSym')
const grandchild =
child.getChild('$CtrlSeq') ?? child.getChild('$CtrlSym')
if (!grandchild) return
const commandName = content.substring(grandchild.from + 1, grandchild.to)
if (!commandName) return
switch (commandName) {
case 'thanks':
iterateNode(nodeRef, 'other')
return false
}
if (!replacementsMap.has(commandName)) return
// TODO: handle accented character in braces after a CtrlSym, e.g. \'{a}
// TODO: handle markup within words, e.g. inter\textbf{nal}formatting
// TODO: handle commands like \egrave and \eacute
const text = replacementsMap.get(commandName)!
textNodes.push({
from: nodeRef.from,
to: nodeRef.to,
text,
context: currentContext,
})
return false
},
BeginEnv(nodeRef) {
const envName = content
?.substring(nodeRef.from + '\\begin{'.length, nodeRef.to - 1)
.replace(/\*$/, '')
$Environment(nodeRef) {
const envNameNode = nodeRef.node
.getChild('BeginEnv')
?.getChild('EnvNameGroup')
?.getChild('EnvName')
if (envName === 'abstract') {
data.headers++
iterateNode(nodeRef, 'abstract')
return false
if (envNameNode) {
const envName = content
?.substring(envNameNode.from, envNameNode.to)
.replace(/\*$/, '')
if (envName === 'abstract') {
data.headers++
const contentNode = nodeRef.node.getChild('Content')
if (contentNode) {
iterateNode(contentNode, 'abstract')
}
return false
}
}
},
BeginEnv() {
return false // ignore text in \begin arguments
},
Math(nodeRef) {
const parent = nodeRef.node.parent
if (parent?.type.is('InlineMath') || parent?.type.is('ParenMath')) {
data.mathInline++
} else {
data.mathDisplay++
}
return false // TODO: count \text in math nodes?
},
'ShortTextArgument ShortOptionalArg'() {
return false
},
@@ -177,12 +235,6 @@ export const countWordsInFile = (
iterateNode(nodeRef, 'header')
return false
},
'DisplayMath BracketMath'() {
data.mathDisplay++
},
'InlineMath ParenMath'() {
data.mathInline++
},
Caption(nodeRef) {
iterateNode(nodeRef, 'caption')
return false
@@ -201,6 +253,14 @@ export const countWordsInFile = (
countWordsInFile(data, projectSnapshot, path, segmenters)
}
},
'BlankLine LineBreak'(nodeRef) {
textNodes.push({
from: nodeRef.from,
to: nodeRef.to,
text: '\n',
context: currentContext,
})
},
})
const preambleExtent = findPreambleExtent(tree)
@@ -226,6 +286,7 @@ export const countWordsInFile = (
caption: '',
text: '',
footnote: '',
other: '',
}
let pos = 0
@@ -240,12 +301,18 @@ export const countWordsInFile = (
for (const [context, text] of Object.entries(texts)) {
const counter = counters[context as Context]
for (const value of segmenters.word.segment(text)) {
// TODO: replace - and _ with a word character if hyphenated words should be counted as one word?
for (const value of segmenters.word.segment(
text.replace(/\w[-_]\w/g, 'aaa')
)) {
if (value.isWordLike) {
data[counter.word]++
}
}
// TODO: count hyphens as characters?
for (const value of segmenters.character.segment(text)) {
// TODO: option for whether to include whitespace?
if (!whiteSpaceRe.test(value.segment)) {

View File

@@ -1,5 +1,5 @@
const wordRe = /['\-.\p{L}]+/gu
const wordLikeRe = /\p{L}/gu // must contain at least one "letter" to be a word
const wordLikeRe = /\p{L}/u // must contain at least one "letter" to be a word
const characterRe = /\S/gu
type SegmentDataLike = {

View File

@@ -274,6 +274,7 @@
"cant_see_what_youre_looking_for_question": "Cant see what youre looking for?",
"caption_above": "Caption above",
"caption_below": "Caption below",
"captions": "Captions",
"card_details": "Card details",
"card_details_are_not_valid": "Card details are not valid",
"card_must_be_authenticated_by_3dsecure": "Your card must be authenticated with 3D Secure before continuing",
@@ -769,6 +770,7 @@
"footer_contact_us": "Contact us",
"footer_navigation": "Footer navigation",
"footer_plans_and_pricing": "Plans & pricing",
"footnotes": "Footnotes",
"for_business": "For business",
"for_enterprise": "For enterprise",
"for_government": "For government",
@@ -2234,6 +2236,7 @@
"test_configuration_successful": "Test configuration successful",
"tex_live_version": "TeX Live version",
"texgpt": "TexGPT",
"text": "Text",
"thank_you": "Thank you!",
"thank_you_email_confirmed": "Thank you, your email is now confirmed",
"thank_you_exclamation": "Thank you!",

View File

@@ -0,0 +1,72 @@
import { readFile } from 'node:fs/promises'
import path from 'node:path'
import { countWordsInFile } from '@/features/word-count-modal/utils/count-words-in-file'
import { WordCountData } from '@/features/word-count-modal/components/word-count-data'
import { createSegmenters } from '@/features/word-count-modal/utils/segmenters'
import { expect } from 'chai'
describe('word count', function () {
beforeEach(async function () {
this.data = {
encode: '',
textWords: 0,
headWords: 0,
outside: 0,
headers: 0,
elements: 0,
mathInline: 0,
mathDisplay: 0,
errors: 0,
messages: '',
textCharacters: 0,
headCharacters: 0,
captionWords: 0,
captionCharacters: 0,
footnoteWords: 0,
footnoteCharacters: 0,
abstractWords: 0,
abstractCharacters: 0,
otherWords: 0,
otherCharacters: 0,
} satisfies WordCountData
const content = {
'word-count.tex': await readFile(
path.join(__dirname, 'word-count.tex'),
'utf-8'
),
}
this.projectSnapshot = {
getDocContents(path: keyof typeof content) {
return content[path]
},
}
this.segmenters = createSegmenters('en_US')
})
it('produces correct counts', function () {
countWordsInFile(
this.data,
this.projectSnapshot,
'word-count.tex',
this.segmenters
)
expect(this.data).to.deep.include({
abstractCharacters: 8,
abstractWords: 2,
captionCharacters: 16,
captionWords: 4,
footnoteCharacters: 8,
footnoteWords: 2,
headCharacters: 296,
headWords: 52,
otherCharacters: 10,
otherWords: 2,
textCharacters: 193,
textWords: 42,
})
})
})

View File

@@ -0,0 +1,118 @@
\documentclass{article}
\usepackage{graphicx}
\usepackage{amsmath}
\title{The Title} % 2 in headers
\author{An Author} % 0
\date{May 2025} % 0
\begin{document}
\maketitle % 0
\thanks{bleep bloop}
\begin{abstract}
Word word % 2 in abstract
\end{abstract}
\section{plain text}
Word word % 2
\section{accents}
w\'ard w\`erd w\"ird w\~ord w\.urd w\^ord % 6
\section{accents with groups}
% w\'{a}rd w\`{e}rd w\"{i}rd w\~{o}rd w\.{u}rd w\^{o}rd w\c{o}rd w\u{o}rd % 8 % TODO
\section{grouped single character command}
% w{\o}rd w\~{\o}rd % 2 % TODO
\section{commands that create characters}
\o\oe\aa\AE % 1
\subsection{with braces}
w\o{}rd w\oe{}rd w\aa{}rd w\AE{}rd % 4
\subsection{with spaces}
w\o rd w\oe rd w\ae rd w\AE rd % 4
\section{symbols}
\S{} \P{} % 0
\section{formatting}
\textit{italic} \textbf{bold} \textit{\textbf{bold italic}} % 4
\texttt{teletype} \textsf{sans-serif} \textsc{small caps} % 4
\textsf{-} % 0
\section{formatting inside word}
% wo\textit{italic}rd %1 % TODO
\section{commands that create text}
\textbackslash{}word \LaTeX{} % 2
\section{special characters}
word\&word \$word word\% \#word wo\_rd \{word\} % 7
\section{footnote}
\footnote{word word} % 2 in footnote
\section{headers}
\part{word} % 1
\chapter{word} % 1
\section{word} % 1
\subsection{word} % 1
\subsubsection{word} % 1
\paragraph{word} % 1
\section{verbatim}
\verb|word word word| % 0
\section{list}
\begin{itemize}
\item word % 1
\item word % 1
\end{itemize}
\section{figure}
\begin{figure}
\includegraphics[width=0.5\linewidth]{example.png} %0
\caption{Word word} %2 in captions
\end{figure}
\section{table}
\begin{table}
\begin{tabular}{c|c}
word & word \\ % 2
word & word % 2
\end{tabular}
\caption{Word word} % 2 in captions
\end{table}
\section{line break}
word\\word % 2
\section{inline math}
$2+3=5$ % 0
\section{display math}
\[2+3=5\]
\begin{equation}
2+3=5
\end{equation}
\begin{equation*}
2+3=5
\end{equation*}
\begin{align*}
2x - 5y &= 8 \\
3x + 9y &= -12
\end{align*}
\section{text in math}
$ 2+3 \text{ is equal to } 5 $
\end{document}