diff --git a/services/web/frontend/extracted-translations.json b/services/web/frontend/extracted-translations.json index df46529ffe..bf0b0f3768 100644 --- a/services/web/frontend/extracted-translations.json +++ b/services/web/frontend/extracted-translations.json @@ -32,6 +32,7 @@ "about_to_leave_projects": "", "about_to_trash_projects": "", "about_writefull": "", + "abstract": "", "accept_and_continue": "", "accept_change": "", "accept_change_error_description": "", @@ -209,6 +210,7 @@ "cant_see_what_youre_looking_for_question": "", "caption_above": "", "caption_below": "", + "captions": "", "card_details": "", "card_details_are_not_valid": "", "card_must_be_authenticated_by_3dsecure": "", @@ -588,6 +590,7 @@ "footer_about_us": "", "footer_contact_us": "", "footer_navigation": "", + "footnotes": "", "for_enterprise": "", "for_government": "", "for_individuals_and_groups": "", @@ -1723,6 +1726,7 @@ "test_configuration_successful": "", "tex_live_version": "", "texgpt": "", + "text": "", "thank_you": "", "thank_you_exclamation": "", "thank_you_for_your_feedback": "", @@ -1860,6 +1864,7 @@ "tooltip_show_filetree": "", "tooltip_show_panel": "", "tooltip_show_pdf": "", + "total": "", "total_due_in_x_days": "", "total_due_today": "", "total_per_month": "", diff --git a/services/web/frontend/js/features/word-count-modal/components/word-count-client.tsx b/services/web/frontend/js/features/word-count-modal/components/word-count-client.tsx index 5f2660d4a8..899d8de2f5 100644 --- a/services/web/frontend/js/features/word-count-modal/components/word-count-client.tsx +++ b/services/web/frontend/js/features/word-count-modal/components/word-count-client.tsx @@ -11,8 +11,8 @@ import { debugConsole } from '@/utils/debugging' import { signalWithTimeout } from '@/utils/abort-signal' import { isMainFile } from '@/features/pdf-preview/util/editor-files' import { countWordsInFile } from '@/features/word-count-modal/utils/count-words-in-file' -import { WordCounts } from '@/features/word-count-modal/components/word-counts' import { createSegmenters } from '@/features/word-count-modal/utils/segmenters' +import { WordCountsClient } from './word-counts-client' export const WordCountClient: FC = () => { const [loading, setLoading] = useState(true) @@ -59,7 +59,8 @@ export const WordCountClient: FC = () => { footnoteWords: 0, footnoteCharacters: 0, outside: 0, - outsideCharacters: 0, + otherWords: 0, + otherCharacters: 0, headers: 0, elements: 0, mathInline: 0, @@ -99,7 +100,7 @@ export const WordCountClient: FC = () => { <> {loading && !error && } {error && } - {data && } + {data && } ) } diff --git a/services/web/frontend/js/features/word-count-modal/components/word-count-data.ts b/services/web/frontend/js/features/word-count-modal/components/word-count-data.ts index a69dc55d45..e322ac9636 100644 --- a/services/web/frontend/js/features/word-count-modal/components/word-count-data.ts +++ b/services/web/frontend/js/features/word-count-modal/components/word-count-data.ts @@ -20,6 +20,6 @@ export type WordCountData = ServerWordCountData & { footnoteCharacters: number abstractWords: number abstractCharacters: number - // outsideWords: number - outsideCharacters: number + otherWords: number + otherCharacters: number } diff --git a/services/web/frontend/js/features/word-count-modal/components/word-count-server.tsx b/services/web/frontend/js/features/word-count-modal/components/word-count-server.tsx index fccd49f8c3..a34807864e 100644 --- a/services/web/frontend/js/features/word-count-modal/components/word-count-server.tsx +++ b/services/web/frontend/js/features/word-count-modal/components/word-count-server.tsx @@ -42,7 +42,7 @@ export const WordCountServer: FC = () => { <> {loading && !error && } {error && } - {data && } + {data && } ) } diff --git a/services/web/frontend/js/features/word-count-modal/components/word-counts-client.tsx b/services/web/frontend/js/features/word-count-modal/components/word-counts-client.tsx new file mode 100644 index 0000000000..4c4aae65f7 --- /dev/null +++ b/services/web/frontend/js/features/word-count-modal/components/word-counts-client.tsx @@ -0,0 +1,138 @@ +import { FC, useMemo } from 'react' +import { WordCountData } from '@/features/word-count-modal/components/word-count-data' +import { useTranslation } from 'react-i18next' +import { Container, Row, Col, Form } from 'react-bootstrap-5' +import OLNotification from '@/features/ui/components/ol/ol-notification' +import usePersistedState from '@/shared/hooks/use-persisted-state' + +export const WordCountsClient: FC<{ data: WordCountData }> = ({ data }) => { + const { t } = useTranslation() + + const [included, setIncluded] = usePersistedState( + 'word-count-total', + ['text'] + ) + + const items = useMemo(() => { + return [ + { + key: 'text', + label: t('text'), + words: data.textWords, + chars: data.textCharacters, + }, + { + key: 'headers', + label: t('headers'), + words: data.headWords, + chars: data.headCharacters, + }, + { + key: 'abstract', + label: t('abstract'), + words: data.abstractWords, + chars: data.abstractCharacters, + }, + { + key: 'captions', + label: t('captions'), + words: data.captionWords, + chars: data.captionCharacters, + }, + { + key: 'footnotes', + label: t('footnotes'), + words: data.footnoteWords, + chars: data.footnoteCharacters, + }, + { + key: 'other', + label: t('other'), + words: data.otherWords, + chars: data.otherCharacters, + }, + ] + }, [data, t]) + + const totals = useMemo(() => { + const totals = { + words: 0, + chars: 0, + } + + for (const item of items) { + if (included.includes(item.key)) { + totals.words += item.words + totals.chars += item.chars + } + } + + return totals + }, [included, items]) + + return ( + + {data.messages && ( + + + {data.messages}

+ } + /> + +
+ )} + + {items.map(item => ( + + + + setIncluded(prevValue => { + return event.target.checked + ? prevValue.concat(item.key) + : prevValue.filter(key => key !== item.key) + }) + } + aria-label={`Include ${item.label} in total`} + /> + + + {item.words} words +
+ {item.chars} chars + +
+ ))} + + + + + {t('total')}: {totals.words} words +
+ {totals.chars} chars +
+ +
+
+ ) +} diff --git a/services/web/frontend/js/features/word-count-modal/components/word-counts.tsx b/services/web/frontend/js/features/word-count-modal/components/word-counts.tsx index dec4d2e6d8..16d96525ac 100644 --- a/services/web/frontend/js/features/word-count-modal/components/word-counts.tsx +++ b/services/web/frontend/js/features/word-count-modal/components/word-counts.tsx @@ -1,22 +1,12 @@ -import { - ServerWordCountData, - WordCountData, -} from '@/features/word-count-modal/components/word-count-data' -import { useTranslation } from 'react-i18next' import { FC } from 'react' +import { ServerWordCountData } from '@/features/word-count-modal/components/word-count-data' +import { useTranslation } from 'react-i18next' import { Container, Row, Col } from 'react-bootstrap-5' import OLNotification from '@/features/ui/components/ol/ol-notification' -export const WordCounts: FC< - | { - data: ServerWordCountData - source: 'server' - } - | { - data: WordCountData - source: 'client' - } -> = ({ data, source }) => { +export const WordCounts: FC<{ + data: ServerWordCountData +}> = ({ data }) => { const { t } = useTranslation() return ( @@ -34,69 +24,32 @@ export const WordCounts: FC< )} - {source === 'client' ? ( - <> - - -
Text:
- - {data.textWords} -
+ + +
{t('total_words')}:
+ + {data.textWords} +
+ + +
{t('headers')}:
+ + {data.headers} +
- - -
Headers:
- - {data.headWords} -
+ + +
{t('math_inline')}:
+ + {data.mathInline} +
- - -
Captions:
- - {data.captionWords} -
- - - -
Footnotes:
- - {data.footnoteWords} -
- - ) : ( - - -
{t('total_words')}:
- - {data.textWords} -
- )} - - {source === 'server' && ( - <> - - -
{t('headers')}:
- - {data.headers} -
- - - -
{t('math_inline')}:
- - {data.mathInline} -
- - - -
{t('math_display')}:
- - {data.mathDisplay} -
- - )} + + +
{t('math_display')}:
+ + {data.mathDisplay} +
) } diff --git a/services/web/frontend/js/features/word-count-modal/utils/count-words-in-file.ts b/services/web/frontend/js/features/word-count-modal/utils/count-words-in-file.ts index d9a9154620..a14120d8d5 100644 --- a/services/web/frontend/js/features/word-count-modal/utils/count-words-in-file.ts +++ b/services/web/frontend/js/features/word-count-modal/utils/count-words-in-file.ts @@ -1,4 +1,3 @@ -import { ProjectSnapshot } from '@/infrastructure/project-snapshot' import { LaTeXLanguage } from '@/features/source-editor/languages/latex/latex-language' import { WordCountData } from '@/features/word-count-modal/components/word-count-data' import { NodeType, SyntaxNodeRef } from '@lezer/common' @@ -8,7 +7,7 @@ import { Segmenters } from './segmenters' const whiteSpaceRe = /^\s$/ -type Context = 'text' | 'header' | 'abstract' | 'caption' | 'footnote' +type Context = 'text' | 'header' | 'abstract' | 'caption' | 'footnote' | 'other' const counters: Record< Context, @@ -37,10 +36,15 @@ const counters: Record< word: 'footnoteWords', character: 'footnoteCharacters', }, + other: { + word: 'otherWords', + character: 'otherCharacters', + }, } +// https://en.wikibooks.org/wiki/LaTeX/Special_Characters#Escaped_codes const replacementsMap: Map = new Map([ - // LaTeX commands that create part of a word + // LaTeX commands that create characters ['aa', 'å'], ['AA', 'Å'], ['ae', 'æ'], @@ -63,19 +67,36 @@ const replacementsMap: Map = new Map([ ['NG', 'Ŋ'], ['i', 'ı'], ['j', 'ȷ'], + // reserved characters + ['&', '&'], + ['$', '$'], + ['%', '%'], + ['#', '#'], ['_', '_'], - // modifier commands for the character in the arguments - ['H', 'a'], - ['c', 'a'], - ['d', 'a'], - ['k', 'a'], - ['v', 'a'], + ['{', '{'], + ['}', '}'], + // modifier commands for the subsequent character(s) (in braces) + ['H', 'ő'], // long Hungarian umlaut (double acute) + ['b', 'o'], // bar under the letter + ['c', 'ç'], // cedilla + ['d', 'o'], // dot under the letter + ['k', 'ą'], // ogonek + ['r', 'å'], // ring over the letter + ['t', 'o͡o'], // "tie" over the two letters + ['u', 'ŏ'], // breve over the letter + ['v', 'š'], // caron/háček over the letter // modifier symbols for the subsequent character - ["'", ''], - ['^', ''], - ['"', ''], - ['=', ''], - ['.', ''], + ["'", ''], // acute + ['^', ''], // circumflex + ['"', ''], // umlaut, trema or dieresis + ['=', ''], // macron accent (a bar over the letter) + ['.', ''], // dot over the letter + ['`', ''], // grave + ['~', ''], // tilde + // commands that create text + ['TeX', 'TeX'], + ['LaTeX', 'LaTeX'], + ['textbackslash', '\\'], ]) type TextNode = { @@ -87,7 +108,7 @@ type TextNode = { export const countWordsInFile = ( data: WordCountData, - projectSnapshot: ProjectSnapshot, + projectSnapshot: { getDocContents(path: string): string | null }, docPath: string, segmenters: Segmenters ) => { @@ -106,10 +127,9 @@ export const countWordsInFile = ( const iterateNode = (nodeRef: SyntaxNodeRef, context: Context = 'text') => { const previousContext = currentContext currentContext = context - const { node } = nodeRef - node.cursor().iterate(childNodeRef => { + nodeRef.node.cursor().iterate(childNodeRef => { // TODO: a better way to iterate only descendants? - if (childNodeRef.node !== node) { + if (childNodeRef.node !== nodeRef.node) { return bodyMatcher(childNodeRef.type)?.(childNodeRef) } }) @@ -141,34 +161,72 @@ export const countWordsInFile = ( const child = nodeRef.node.getChild('UnknownCommand') if (!child) return - const grandchild = child.getChild('CtrlSeq') ?? child.getChild('CtrlSym') + const grandchild = + child.getChild('$CtrlSeq') ?? child.getChild('$CtrlSym') if (!grandchild) return const commandName = content.substring(grandchild.from + 1, grandchild.to) if (!commandName) return + switch (commandName) { + case 'thanks': + iterateNode(nodeRef, 'other') + return false + } + if (!replacementsMap.has(commandName)) return + // TODO: handle accented character in braces after a CtrlSym, e.g. \'{a} + // TODO: handle markup within words, e.g. inter\textbf{nal}formatting + // TODO: handle commands like \egrave and \eacute + const text = replacementsMap.get(commandName)! + textNodes.push({ from: nodeRef.from, to: nodeRef.to, text, context: currentContext, }) + return false }, - BeginEnv(nodeRef) { - const envName = content - ?.substring(nodeRef.from + '\\begin{'.length, nodeRef.to - 1) - .replace(/\*$/, '') + $Environment(nodeRef) { + const envNameNode = nodeRef.node + .getChild('BeginEnv') + ?.getChild('EnvNameGroup') + ?.getChild('EnvName') - if (envName === 'abstract') { - data.headers++ - iterateNode(nodeRef, 'abstract') - return false + if (envNameNode) { + const envName = content + ?.substring(envNameNode.from, envNameNode.to) + .replace(/\*$/, '') + + if (envName === 'abstract') { + data.headers++ + + const contentNode = nodeRef.node.getChild('Content') + if (contentNode) { + iterateNode(contentNode, 'abstract') + } + + return false + } } }, + BeginEnv() { + return false // ignore text in \begin arguments + }, + Math(nodeRef) { + const parent = nodeRef.node.parent + if (parent?.type.is('InlineMath') || parent?.type.is('ParenMath')) { + data.mathInline++ + } else { + data.mathDisplay++ + } + + return false // TODO: count \text in math nodes? + }, 'ShortTextArgument ShortOptionalArg'() { return false }, @@ -177,12 +235,6 @@ export const countWordsInFile = ( iterateNode(nodeRef, 'header') return false }, - 'DisplayMath BracketMath'() { - data.mathDisplay++ - }, - 'InlineMath ParenMath'() { - data.mathInline++ - }, Caption(nodeRef) { iterateNode(nodeRef, 'caption') return false @@ -201,6 +253,14 @@ export const countWordsInFile = ( countWordsInFile(data, projectSnapshot, path, segmenters) } }, + 'BlankLine LineBreak'(nodeRef) { + textNodes.push({ + from: nodeRef.from, + to: nodeRef.to, + text: '\n', + context: currentContext, + }) + }, }) const preambleExtent = findPreambleExtent(tree) @@ -226,6 +286,7 @@ export const countWordsInFile = ( caption: '', text: '', footnote: '', + other: '', } let pos = 0 @@ -240,12 +301,18 @@ export const countWordsInFile = ( for (const [context, text] of Object.entries(texts)) { const counter = counters[context as Context] - for (const value of segmenters.word.segment(text)) { + // TODO: replace - and _ with a word character if hyphenated words should be counted as one word? + + for (const value of segmenters.word.segment( + text.replace(/\w[-_]\w/g, 'aaa') + )) { if (value.isWordLike) { data[counter.word]++ } } + // TODO: count hyphens as characters? + for (const value of segmenters.character.segment(text)) { // TODO: option for whether to include whitespace? if (!whiteSpaceRe.test(value.segment)) { diff --git a/services/web/frontend/js/features/word-count-modal/utils/segmenters.ts b/services/web/frontend/js/features/word-count-modal/utils/segmenters.ts index e14c3365fd..7e52e1e6fa 100644 --- a/services/web/frontend/js/features/word-count-modal/utils/segmenters.ts +++ b/services/web/frontend/js/features/word-count-modal/utils/segmenters.ts @@ -1,5 +1,5 @@ const wordRe = /['\-.\p{L}]+/gu -const wordLikeRe = /\p{L}/gu // must contain at least one "letter" to be a word +const wordLikeRe = /\p{L}/u // must contain at least one "letter" to be a word const characterRe = /\S/gu type SegmentDataLike = { diff --git a/services/web/locales/en.json b/services/web/locales/en.json index 2735e04205..30ac3c472e 100644 --- a/services/web/locales/en.json +++ b/services/web/locales/en.json @@ -274,6 +274,7 @@ "cant_see_what_youre_looking_for_question": "Can’t see what you’re looking for?", "caption_above": "Caption above", "caption_below": "Caption below", + "captions": "Captions", "card_details": "Card details", "card_details_are_not_valid": "Card details are not valid", "card_must_be_authenticated_by_3dsecure": "Your card must be authenticated with 3D Secure before continuing", @@ -769,6 +770,7 @@ "footer_contact_us": "Contact us", "footer_navigation": "Footer navigation", "footer_plans_and_pricing": "Plans & pricing", + "footnotes": "Footnotes", "for_business": "For business", "for_enterprise": "For enterprise", "for_government": "For government", @@ -2234,6 +2236,7 @@ "test_configuration_successful": "Test configuration successful", "tex_live_version": "TeX Live version", "texgpt": "TexGPT", + "text": "Text", "thank_you": "Thank you!", "thank_you_email_confirmed": "Thank you, your email is now confirmed", "thank_you_exclamation": "Thank you!", diff --git a/services/web/test/frontend/features/word-count-modal/utils/count-words-in-file.test.ts b/services/web/test/frontend/features/word-count-modal/utils/count-words-in-file.test.ts new file mode 100644 index 0000000000..32c7b72c7d --- /dev/null +++ b/services/web/test/frontend/features/word-count-modal/utils/count-words-in-file.test.ts @@ -0,0 +1,72 @@ +import { readFile } from 'node:fs/promises' +import path from 'node:path' +import { countWordsInFile } from '@/features/word-count-modal/utils/count-words-in-file' +import { WordCountData } from '@/features/word-count-modal/components/word-count-data' +import { createSegmenters } from '@/features/word-count-modal/utils/segmenters' +import { expect } from 'chai' + +describe('word count', function () { + beforeEach(async function () { + this.data = { + encode: '', + textWords: 0, + headWords: 0, + outside: 0, + headers: 0, + elements: 0, + mathInline: 0, + mathDisplay: 0, + errors: 0, + messages: '', + textCharacters: 0, + headCharacters: 0, + captionWords: 0, + captionCharacters: 0, + footnoteWords: 0, + footnoteCharacters: 0, + abstractWords: 0, + abstractCharacters: 0, + otherWords: 0, + otherCharacters: 0, + } satisfies WordCountData + + const content = { + 'word-count.tex': await readFile( + path.join(__dirname, 'word-count.tex'), + 'utf-8' + ), + } + + this.projectSnapshot = { + getDocContents(path: keyof typeof content) { + return content[path] + }, + } + + this.segmenters = createSegmenters('en_US') + }) + + it('produces correct counts', function () { + countWordsInFile( + this.data, + this.projectSnapshot, + 'word-count.tex', + this.segmenters + ) + + expect(this.data).to.deep.include({ + abstractCharacters: 8, + abstractWords: 2, + captionCharacters: 16, + captionWords: 4, + footnoteCharacters: 8, + footnoteWords: 2, + headCharacters: 296, + headWords: 52, + otherCharacters: 10, + otherWords: 2, + textCharacters: 193, + textWords: 42, + }) + }) +}) diff --git a/services/web/test/frontend/features/word-count-modal/utils/word-count.tex b/services/web/test/frontend/features/word-count-modal/utils/word-count.tex new file mode 100644 index 0000000000..4522d6a3bc --- /dev/null +++ b/services/web/test/frontend/features/word-count-modal/utils/word-count.tex @@ -0,0 +1,118 @@ +\documentclass{article} +\usepackage{graphicx} +\usepackage{amsmath} + +\title{The Title} % 2 in headers +\author{An Author} % 0 +\date{May 2025} % 0 + +\begin{document} + +\maketitle % 0 + +\thanks{bleep bloop} + +\begin{abstract} +Word word % 2 in abstract +\end{abstract} + +\section{plain text} +Word word % 2 + +\section{accents} +w\'ard w\`erd w\"ird w\~ord w\.urd w\^ord % 6 + +\section{accents with groups} +% w\'{a}rd w\`{e}rd w\"{i}rd w\~{o}rd w\.{u}rd w\^{o}rd w\c{o}rd w\u{o}rd % 8 % TODO + +\section{grouped single character command} +% w{\o}rd w\~{\o}rd % 2 % TODO + +\section{commands that create characters} +\o\oe\aa\AE % 1 + +\subsection{with braces} +w\o{}rd w\oe{}rd w\aa{}rd w\AE{}rd % 4 + +\subsection{with spaces} +w\o rd w\oe rd w\ae rd w\AE rd % 4 + +\section{symbols} +\S{} \P{} % 0 + +\section{formatting} +\textit{italic} \textbf{bold} \textit{\textbf{bold italic}} % 4 +\texttt{teletype} \textsf{sans-serif} \textsc{small caps} % 4 +\textsf{-} % 0 + +\section{formatting inside word} +% wo\textit{italic}rd %1 % TODO + +\section{commands that create text} +\textbackslash{}word \LaTeX{} % 2 + +\section{special characters} +word\&word \$word word\% \#word wo\_rd \{word\} % 7 + +\section{footnote} +\footnote{word word} % 2 in footnote + +\section{headers} +\part{word} % 1 +\chapter{word} % 1 +\section{word} % 1 +\subsection{word} % 1 +\subsubsection{word} % 1 +\paragraph{word} % 1 + +\section{verbatim} +\verb|word word word| % 0 + +\section{list} +\begin{itemize} + \item word % 1 + \item word % 1 +\end{itemize} + +\section{figure} +\begin{figure} + \includegraphics[width=0.5\linewidth]{example.png} %0 + \caption{Word word} %2 in captions +\end{figure} + +\section{table} +\begin{table} + \begin{tabular}{c|c} + word & word \\ % 2 + word & word % 2 + \end{tabular} + \caption{Word word} % 2 in captions +\end{table} + +\section{line break} +word\\word % 2 + +\section{inline math} +$2+3=5$ % 0 + +\section{display math} +\[2+3=5\] + +\begin{equation} + 2+3=5 +\end{equation} + +\begin{equation*} + 2+3=5 +\end{equation*} + +\begin{align*} +2x - 5y &= 8 \\ +3x + 9y &= -12 +\end{align*} + +\section{text in math} + +$ 2+3 \text{ is equal to } 5 $ + +\end{document}