mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-23 17:19:37 +02:00
Add more detail to word count UI (#25400)
GitOrigin-RevId: 3521f2ea03332e46ef1bac634ce0650cdce01249
This commit is contained in:
@@ -32,6 +32,7 @@
|
||||
"about_to_leave_projects": "",
|
||||
"about_to_trash_projects": "",
|
||||
"about_writefull": "",
|
||||
"abstract": "",
|
||||
"accept_and_continue": "",
|
||||
"accept_change": "",
|
||||
"accept_change_error_description": "",
|
||||
@@ -209,6 +210,7 @@
|
||||
"cant_see_what_youre_looking_for_question": "",
|
||||
"caption_above": "",
|
||||
"caption_below": "",
|
||||
"captions": "",
|
||||
"card_details": "",
|
||||
"card_details_are_not_valid": "",
|
||||
"card_must_be_authenticated_by_3dsecure": "",
|
||||
@@ -588,6 +590,7 @@
|
||||
"footer_about_us": "",
|
||||
"footer_contact_us": "",
|
||||
"footer_navigation": "",
|
||||
"footnotes": "",
|
||||
"for_enterprise": "",
|
||||
"for_government": "",
|
||||
"for_individuals_and_groups": "",
|
||||
@@ -1723,6 +1726,7 @@
|
||||
"test_configuration_successful": "",
|
||||
"tex_live_version": "",
|
||||
"texgpt": "",
|
||||
"text": "",
|
||||
"thank_you": "",
|
||||
"thank_you_exclamation": "",
|
||||
"thank_you_for_your_feedback": "",
|
||||
@@ -1860,6 +1864,7 @@
|
||||
"tooltip_show_filetree": "",
|
||||
"tooltip_show_panel": "",
|
||||
"tooltip_show_pdf": "",
|
||||
"total": "",
|
||||
"total_due_in_x_days": "",
|
||||
"total_due_today": "",
|
||||
"total_per_month": "",
|
||||
|
||||
@@ -11,8 +11,8 @@ import { debugConsole } from '@/utils/debugging'
|
||||
import { signalWithTimeout } from '@/utils/abort-signal'
|
||||
import { isMainFile } from '@/features/pdf-preview/util/editor-files'
|
||||
import { countWordsInFile } from '@/features/word-count-modal/utils/count-words-in-file'
|
||||
import { WordCounts } from '@/features/word-count-modal/components/word-counts'
|
||||
import { createSegmenters } from '@/features/word-count-modal/utils/segmenters'
|
||||
import { WordCountsClient } from './word-counts-client'
|
||||
|
||||
export const WordCountClient: FC = () => {
|
||||
const [loading, setLoading] = useState(true)
|
||||
@@ -59,7 +59,8 @@ export const WordCountClient: FC = () => {
|
||||
footnoteWords: 0,
|
||||
footnoteCharacters: 0,
|
||||
outside: 0,
|
||||
outsideCharacters: 0,
|
||||
otherWords: 0,
|
||||
otherCharacters: 0,
|
||||
headers: 0,
|
||||
elements: 0,
|
||||
mathInline: 0,
|
||||
@@ -99,7 +100,7 @@ export const WordCountClient: FC = () => {
|
||||
<>
|
||||
{loading && !error && <WordCountLoading />}
|
||||
{error && <WordCountError />}
|
||||
{data && <WordCounts data={data} source="client" />}
|
||||
{data && <WordCountsClient data={data} />}
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -20,6 +20,6 @@ export type WordCountData = ServerWordCountData & {
|
||||
footnoteCharacters: number
|
||||
abstractWords: number
|
||||
abstractCharacters: number
|
||||
// outsideWords: number
|
||||
outsideCharacters: number
|
||||
otherWords: number
|
||||
otherCharacters: number
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ export const WordCountServer: FC = () => {
|
||||
<>
|
||||
{loading && !error && <WordCountLoading />}
|
||||
{error && <WordCountError />}
|
||||
{data && <WordCounts data={data} source="server" />}
|
||||
{data && <WordCounts data={data} />}
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,138 @@
|
||||
import { FC, useMemo } from 'react'
|
||||
import { WordCountData } from '@/features/word-count-modal/components/word-count-data'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { Container, Row, Col, Form } from 'react-bootstrap-5'
|
||||
import OLNotification from '@/features/ui/components/ol/ol-notification'
|
||||
import usePersistedState from '@/shared/hooks/use-persisted-state'
|
||||
|
||||
export const WordCountsClient: FC<{ data: WordCountData }> = ({ data }) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
const [included, setIncluded] = usePersistedState<string[]>(
|
||||
'word-count-total',
|
||||
['text']
|
||||
)
|
||||
|
||||
const items = useMemo(() => {
|
||||
return [
|
||||
{
|
||||
key: 'text',
|
||||
label: t('text'),
|
||||
words: data.textWords,
|
||||
chars: data.textCharacters,
|
||||
},
|
||||
{
|
||||
key: 'headers',
|
||||
label: t('headers'),
|
||||
words: data.headWords,
|
||||
chars: data.headCharacters,
|
||||
},
|
||||
{
|
||||
key: 'abstract',
|
||||
label: t('abstract'),
|
||||
words: data.abstractWords,
|
||||
chars: data.abstractCharacters,
|
||||
},
|
||||
{
|
||||
key: 'captions',
|
||||
label: t('captions'),
|
||||
words: data.captionWords,
|
||||
chars: data.captionCharacters,
|
||||
},
|
||||
{
|
||||
key: 'footnotes',
|
||||
label: t('footnotes'),
|
||||
words: data.footnoteWords,
|
||||
chars: data.footnoteCharacters,
|
||||
},
|
||||
{
|
||||
key: 'other',
|
||||
label: t('other'),
|
||||
words: data.otherWords,
|
||||
chars: data.otherCharacters,
|
||||
},
|
||||
]
|
||||
}, [data, t])
|
||||
|
||||
const totals = useMemo(() => {
|
||||
const totals = {
|
||||
words: 0,
|
||||
chars: 0,
|
||||
}
|
||||
|
||||
for (const item of items) {
|
||||
if (included.includes(item.key)) {
|
||||
totals.words += item.words
|
||||
totals.chars += item.chars
|
||||
}
|
||||
}
|
||||
|
||||
return totals
|
||||
}, [included, items])
|
||||
|
||||
return (
|
||||
<Container fluid>
|
||||
{data.messages && (
|
||||
<Row>
|
||||
<Col xs={12}>
|
||||
<OLNotification
|
||||
type="error"
|
||||
content={
|
||||
<p style={{ whiteSpace: 'pre-wrap' }}>{data.messages}</p>
|
||||
}
|
||||
/>
|
||||
</Col>
|
||||
</Row>
|
||||
)}
|
||||
|
||||
{items.map(item => (
|
||||
<Row
|
||||
key={item.key}
|
||||
style={{
|
||||
borderBottom: '1px solid #eee',
|
||||
padding: 5,
|
||||
marginBottom: 5,
|
||||
}}
|
||||
>
|
||||
<Col
|
||||
style={{
|
||||
display: 'flex',
|
||||
alignItems: 'top',
|
||||
justifyContent: 'space-between',
|
||||
}}
|
||||
>
|
||||
<Form.Check
|
||||
type="checkbox"
|
||||
id={`word-count-${item.key}`}
|
||||
label={item.label}
|
||||
checked={included.includes(item.key)}
|
||||
onChange={event =>
|
||||
setIncluded(prevValue => {
|
||||
return event.target.checked
|
||||
? prevValue.concat(item.key)
|
||||
: prevValue.filter(key => key !== item.key)
|
||||
})
|
||||
}
|
||||
aria-label={`Include ${item.label} in total`}
|
||||
/>
|
||||
</Col>
|
||||
<Col>
|
||||
{item.words} words
|
||||
<br />
|
||||
{item.chars} chars
|
||||
</Col>
|
||||
</Row>
|
||||
))}
|
||||
|
||||
<Row>
|
||||
<Col style={{ textAlign: 'right' }}>
|
||||
<span style={{ fontWeight: 'bold' }}>
|
||||
{t('total')}: {totals.words} words
|
||||
<br />
|
||||
{totals.chars} chars
|
||||
</span>
|
||||
</Col>
|
||||
</Row>
|
||||
</Container>
|
||||
)
|
||||
}
|
||||
@@ -1,22 +1,12 @@
|
||||
import {
|
||||
ServerWordCountData,
|
||||
WordCountData,
|
||||
} from '@/features/word-count-modal/components/word-count-data'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { FC } from 'react'
|
||||
import { ServerWordCountData } from '@/features/word-count-modal/components/word-count-data'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { Container, Row, Col } from 'react-bootstrap-5'
|
||||
import OLNotification from '@/features/ui/components/ol/ol-notification'
|
||||
|
||||
export const WordCounts: FC<
|
||||
| {
|
||||
data: ServerWordCountData
|
||||
source: 'server'
|
||||
}
|
||||
| {
|
||||
data: WordCountData
|
||||
source: 'client'
|
||||
}
|
||||
> = ({ data, source }) => {
|
||||
export const WordCounts: FC<{
|
||||
data: ServerWordCountData
|
||||
}> = ({ data }) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
return (
|
||||
@@ -34,69 +24,32 @@ export const WordCounts: FC<
|
||||
</Row>
|
||||
)}
|
||||
|
||||
{source === 'client' ? (
|
||||
<>
|
||||
<Row>
|
||||
<Col xs={4}>
|
||||
<div className="pull-right">Text:</div>
|
||||
</Col>
|
||||
<Col xs={6}>{data.textWords}</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col xs={4}>
|
||||
<div className="pull-right">{t('total_words')}:</div>
|
||||
</Col>
|
||||
<Col xs={6}>{data.textWords}</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col xs={4}>
|
||||
<div className="pull-right">{t('headers')}:</div>
|
||||
</Col>
|
||||
<Col xs={6}>{data.headers}</Col>
|
||||
</Row>
|
||||
|
||||
<Row>
|
||||
<Col xs={4}>
|
||||
<div className="pull-right">Headers:</div>
|
||||
</Col>
|
||||
<Col xs={6}>{data.headWords}</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col xs={4}>
|
||||
<div className="pull-right">{t('math_inline')}:</div>
|
||||
</Col>
|
||||
<Col xs={6}>{data.mathInline}</Col>
|
||||
</Row>
|
||||
|
||||
<Row>
|
||||
<Col xs={4}>
|
||||
<div className="pull-right">Captions:</div>
|
||||
</Col>
|
||||
<Col xs={6}>{data.captionWords}</Col>
|
||||
</Row>
|
||||
|
||||
<Row>
|
||||
<Col xs={4}>
|
||||
<div className="pull-right">Footnotes:</div>
|
||||
</Col>
|
||||
<Col xs={6}>{data.footnoteWords}</Col>
|
||||
</Row>
|
||||
</>
|
||||
) : (
|
||||
<Row>
|
||||
<Col xs={4}>
|
||||
<div className="pull-right">{t('total_words')}:</div>
|
||||
</Col>
|
||||
<Col xs={6}>{data.textWords}</Col>
|
||||
</Row>
|
||||
)}
|
||||
|
||||
{source === 'server' && (
|
||||
<>
|
||||
<Row>
|
||||
<Col xs={4}>
|
||||
<div className="pull-right">{t('headers')}:</div>
|
||||
</Col>
|
||||
<Col xs={6}>{data.headers}</Col>
|
||||
</Row>
|
||||
|
||||
<Row>
|
||||
<Col xs={4}>
|
||||
<div className="pull-right">{t('math_inline')}:</div>
|
||||
</Col>
|
||||
<Col xs={6}>{data.mathInline}</Col>
|
||||
</Row>
|
||||
|
||||
<Row>
|
||||
<Col xs={4}>
|
||||
<div className="pull-right">{t('math_display')}:</div>
|
||||
</Col>
|
||||
<Col xs={6}>{data.mathDisplay}</Col>
|
||||
</Row>
|
||||
</>
|
||||
)}
|
||||
<Row>
|
||||
<Col xs={4}>
|
||||
<div className="pull-right">{t('math_display')}:</div>
|
||||
</Col>
|
||||
<Col xs={6}>{data.mathDisplay}</Col>
|
||||
</Row>
|
||||
</Container>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import { ProjectSnapshot } from '@/infrastructure/project-snapshot'
|
||||
import { LaTeXLanguage } from '@/features/source-editor/languages/latex/latex-language'
|
||||
import { WordCountData } from '@/features/word-count-modal/components/word-count-data'
|
||||
import { NodeType, SyntaxNodeRef } from '@lezer/common'
|
||||
@@ -8,7 +7,7 @@ import { Segmenters } from './segmenters'
|
||||
|
||||
const whiteSpaceRe = /^\s$/
|
||||
|
||||
type Context = 'text' | 'header' | 'abstract' | 'caption' | 'footnote'
|
||||
type Context = 'text' | 'header' | 'abstract' | 'caption' | 'footnote' | 'other'
|
||||
|
||||
const counters: Record<
|
||||
Context,
|
||||
@@ -37,10 +36,15 @@ const counters: Record<
|
||||
word: 'footnoteWords',
|
||||
character: 'footnoteCharacters',
|
||||
},
|
||||
other: {
|
||||
word: 'otherWords',
|
||||
character: 'otherCharacters',
|
||||
},
|
||||
}
|
||||
|
||||
// https://en.wikibooks.org/wiki/LaTeX/Special_Characters#Escaped_codes
|
||||
const replacementsMap: Map<string, string> = new Map([
|
||||
// LaTeX commands that create part of a word
|
||||
// LaTeX commands that create characters
|
||||
['aa', 'å'],
|
||||
['AA', 'Å'],
|
||||
['ae', 'æ'],
|
||||
@@ -63,19 +67,36 @@ const replacementsMap: Map<string, string> = new Map([
|
||||
['NG', 'Ŋ'],
|
||||
['i', 'ı'],
|
||||
['j', 'ȷ'],
|
||||
// reserved characters
|
||||
['&', '&'],
|
||||
['$', '$'],
|
||||
['%', '%'],
|
||||
['#', '#'],
|
||||
['_', '_'],
|
||||
// modifier commands for the character in the arguments
|
||||
['H', 'a'],
|
||||
['c', 'a'],
|
||||
['d', 'a'],
|
||||
['k', 'a'],
|
||||
['v', 'a'],
|
||||
['{', '{'],
|
||||
['}', '}'],
|
||||
// modifier commands for the subsequent character(s) (in braces)
|
||||
['H', 'ő'], // long Hungarian umlaut (double acute)
|
||||
['b', 'o'], // bar under the letter
|
||||
['c', 'ç'], // cedilla
|
||||
['d', 'o'], // dot under the letter
|
||||
['k', 'ą'], // ogonek
|
||||
['r', 'å'], // ring over the letter
|
||||
['t', 'o͡o'], // "tie" over the two letters
|
||||
['u', 'ŏ'], // breve over the letter
|
||||
['v', 'š'], // caron/háček over the letter
|
||||
// modifier symbols for the subsequent character
|
||||
["'", ''],
|
||||
['^', ''],
|
||||
['"', ''],
|
||||
['=', ''],
|
||||
['.', ''],
|
||||
["'", ''], // acute
|
||||
['^', ''], // circumflex
|
||||
['"', ''], // umlaut, trema or dieresis
|
||||
['=', ''], // macron accent (a bar over the letter)
|
||||
['.', ''], // dot over the letter
|
||||
['`', ''], // grave
|
||||
['~', ''], // tilde
|
||||
// commands that create text
|
||||
['TeX', 'TeX'],
|
||||
['LaTeX', 'LaTeX'],
|
||||
['textbackslash', '\\'],
|
||||
])
|
||||
|
||||
type TextNode = {
|
||||
@@ -87,7 +108,7 @@ type TextNode = {
|
||||
|
||||
export const countWordsInFile = (
|
||||
data: WordCountData,
|
||||
projectSnapshot: ProjectSnapshot,
|
||||
projectSnapshot: { getDocContents(path: string): string | null },
|
||||
docPath: string,
|
||||
segmenters: Segmenters
|
||||
) => {
|
||||
@@ -106,10 +127,9 @@ export const countWordsInFile = (
|
||||
const iterateNode = (nodeRef: SyntaxNodeRef, context: Context = 'text') => {
|
||||
const previousContext = currentContext
|
||||
currentContext = context
|
||||
const { node } = nodeRef
|
||||
node.cursor().iterate(childNodeRef => {
|
||||
nodeRef.node.cursor().iterate(childNodeRef => {
|
||||
// TODO: a better way to iterate only descendants?
|
||||
if (childNodeRef.node !== node) {
|
||||
if (childNodeRef.node !== nodeRef.node) {
|
||||
return bodyMatcher(childNodeRef.type)?.(childNodeRef)
|
||||
}
|
||||
})
|
||||
@@ -141,34 +161,72 @@ export const countWordsInFile = (
|
||||
const child = nodeRef.node.getChild('UnknownCommand')
|
||||
if (!child) return
|
||||
|
||||
const grandchild = child.getChild('CtrlSeq') ?? child.getChild('CtrlSym')
|
||||
const grandchild =
|
||||
child.getChild('$CtrlSeq') ?? child.getChild('$CtrlSym')
|
||||
if (!grandchild) return
|
||||
|
||||
const commandName = content.substring(grandchild.from + 1, grandchild.to)
|
||||
if (!commandName) return
|
||||
|
||||
switch (commandName) {
|
||||
case 'thanks':
|
||||
iterateNode(nodeRef, 'other')
|
||||
return false
|
||||
}
|
||||
|
||||
if (!replacementsMap.has(commandName)) return
|
||||
|
||||
// TODO: handle accented character in braces after a CtrlSym, e.g. \'{a}
|
||||
// TODO: handle markup within words, e.g. inter\textbf{nal}formatting
|
||||
// TODO: handle commands like \egrave and \eacute
|
||||
|
||||
const text = replacementsMap.get(commandName)!
|
||||
|
||||
textNodes.push({
|
||||
from: nodeRef.from,
|
||||
to: nodeRef.to,
|
||||
text,
|
||||
context: currentContext,
|
||||
})
|
||||
|
||||
return false
|
||||
},
|
||||
BeginEnv(nodeRef) {
|
||||
const envName = content
|
||||
?.substring(nodeRef.from + '\\begin{'.length, nodeRef.to - 1)
|
||||
.replace(/\*$/, '')
|
||||
$Environment(nodeRef) {
|
||||
const envNameNode = nodeRef.node
|
||||
.getChild('BeginEnv')
|
||||
?.getChild('EnvNameGroup')
|
||||
?.getChild('EnvName')
|
||||
|
||||
if (envName === 'abstract') {
|
||||
data.headers++
|
||||
iterateNode(nodeRef, 'abstract')
|
||||
return false
|
||||
if (envNameNode) {
|
||||
const envName = content
|
||||
?.substring(envNameNode.from, envNameNode.to)
|
||||
.replace(/\*$/, '')
|
||||
|
||||
if (envName === 'abstract') {
|
||||
data.headers++
|
||||
|
||||
const contentNode = nodeRef.node.getChild('Content')
|
||||
if (contentNode) {
|
||||
iterateNode(contentNode, 'abstract')
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
||||
},
|
||||
BeginEnv() {
|
||||
return false // ignore text in \begin arguments
|
||||
},
|
||||
Math(nodeRef) {
|
||||
const parent = nodeRef.node.parent
|
||||
if (parent?.type.is('InlineMath') || parent?.type.is('ParenMath')) {
|
||||
data.mathInline++
|
||||
} else {
|
||||
data.mathDisplay++
|
||||
}
|
||||
|
||||
return false // TODO: count \text in math nodes?
|
||||
},
|
||||
'ShortTextArgument ShortOptionalArg'() {
|
||||
return false
|
||||
},
|
||||
@@ -177,12 +235,6 @@ export const countWordsInFile = (
|
||||
iterateNode(nodeRef, 'header')
|
||||
return false
|
||||
},
|
||||
'DisplayMath BracketMath'() {
|
||||
data.mathDisplay++
|
||||
},
|
||||
'InlineMath ParenMath'() {
|
||||
data.mathInline++
|
||||
},
|
||||
Caption(nodeRef) {
|
||||
iterateNode(nodeRef, 'caption')
|
||||
return false
|
||||
@@ -201,6 +253,14 @@ export const countWordsInFile = (
|
||||
countWordsInFile(data, projectSnapshot, path, segmenters)
|
||||
}
|
||||
},
|
||||
'BlankLine LineBreak'(nodeRef) {
|
||||
textNodes.push({
|
||||
from: nodeRef.from,
|
||||
to: nodeRef.to,
|
||||
text: '\n',
|
||||
context: currentContext,
|
||||
})
|
||||
},
|
||||
})
|
||||
|
||||
const preambleExtent = findPreambleExtent(tree)
|
||||
@@ -226,6 +286,7 @@ export const countWordsInFile = (
|
||||
caption: '',
|
||||
text: '',
|
||||
footnote: '',
|
||||
other: '',
|
||||
}
|
||||
|
||||
let pos = 0
|
||||
@@ -240,12 +301,18 @@ export const countWordsInFile = (
|
||||
for (const [context, text] of Object.entries(texts)) {
|
||||
const counter = counters[context as Context]
|
||||
|
||||
for (const value of segmenters.word.segment(text)) {
|
||||
// TODO: replace - and _ with a word character if hyphenated words should be counted as one word?
|
||||
|
||||
for (const value of segmenters.word.segment(
|
||||
text.replace(/\w[-_]\w/g, 'aaa')
|
||||
)) {
|
||||
if (value.isWordLike) {
|
||||
data[counter.word]++
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: count hyphens as characters?
|
||||
|
||||
for (const value of segmenters.character.segment(text)) {
|
||||
// TODO: option for whether to include whitespace?
|
||||
if (!whiteSpaceRe.test(value.segment)) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
const wordRe = /['\-.\p{L}]+/gu
|
||||
const wordLikeRe = /\p{L}/gu // must contain at least one "letter" to be a word
|
||||
const wordLikeRe = /\p{L}/u // must contain at least one "letter" to be a word
|
||||
const characterRe = /\S/gu
|
||||
|
||||
type SegmentDataLike = {
|
||||
|
||||
@@ -274,6 +274,7 @@
|
||||
"cant_see_what_youre_looking_for_question": "Can’t see what you’re looking for?",
|
||||
"caption_above": "Caption above",
|
||||
"caption_below": "Caption below",
|
||||
"captions": "Captions",
|
||||
"card_details": "Card details",
|
||||
"card_details_are_not_valid": "Card details are not valid",
|
||||
"card_must_be_authenticated_by_3dsecure": "Your card must be authenticated with 3D Secure before continuing",
|
||||
@@ -769,6 +770,7 @@
|
||||
"footer_contact_us": "Contact us",
|
||||
"footer_navigation": "Footer navigation",
|
||||
"footer_plans_and_pricing": "Plans & pricing",
|
||||
"footnotes": "Footnotes",
|
||||
"for_business": "For business",
|
||||
"for_enterprise": "For enterprise",
|
||||
"for_government": "For government",
|
||||
@@ -2234,6 +2236,7 @@
|
||||
"test_configuration_successful": "Test configuration successful",
|
||||
"tex_live_version": "TeX Live version",
|
||||
"texgpt": "TexGPT",
|
||||
"text": "Text",
|
||||
"thank_you": "Thank you!",
|
||||
"thank_you_email_confirmed": "Thank you, your email is now confirmed",
|
||||
"thank_you_exclamation": "Thank you!",
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
import { readFile } from 'node:fs/promises'
|
||||
import path from 'node:path'
|
||||
import { countWordsInFile } from '@/features/word-count-modal/utils/count-words-in-file'
|
||||
import { WordCountData } from '@/features/word-count-modal/components/word-count-data'
|
||||
import { createSegmenters } from '@/features/word-count-modal/utils/segmenters'
|
||||
import { expect } from 'chai'
|
||||
|
||||
describe('word count', function () {
|
||||
beforeEach(async function () {
|
||||
this.data = {
|
||||
encode: '',
|
||||
textWords: 0,
|
||||
headWords: 0,
|
||||
outside: 0,
|
||||
headers: 0,
|
||||
elements: 0,
|
||||
mathInline: 0,
|
||||
mathDisplay: 0,
|
||||
errors: 0,
|
||||
messages: '',
|
||||
textCharacters: 0,
|
||||
headCharacters: 0,
|
||||
captionWords: 0,
|
||||
captionCharacters: 0,
|
||||
footnoteWords: 0,
|
||||
footnoteCharacters: 0,
|
||||
abstractWords: 0,
|
||||
abstractCharacters: 0,
|
||||
otherWords: 0,
|
||||
otherCharacters: 0,
|
||||
} satisfies WordCountData
|
||||
|
||||
const content = {
|
||||
'word-count.tex': await readFile(
|
||||
path.join(__dirname, 'word-count.tex'),
|
||||
'utf-8'
|
||||
),
|
||||
}
|
||||
|
||||
this.projectSnapshot = {
|
||||
getDocContents(path: keyof typeof content) {
|
||||
return content[path]
|
||||
},
|
||||
}
|
||||
|
||||
this.segmenters = createSegmenters('en_US')
|
||||
})
|
||||
|
||||
it('produces correct counts', function () {
|
||||
countWordsInFile(
|
||||
this.data,
|
||||
this.projectSnapshot,
|
||||
'word-count.tex',
|
||||
this.segmenters
|
||||
)
|
||||
|
||||
expect(this.data).to.deep.include({
|
||||
abstractCharacters: 8,
|
||||
abstractWords: 2,
|
||||
captionCharacters: 16,
|
||||
captionWords: 4,
|
||||
footnoteCharacters: 8,
|
||||
footnoteWords: 2,
|
||||
headCharacters: 296,
|
||||
headWords: 52,
|
||||
otherCharacters: 10,
|
||||
otherWords: 2,
|
||||
textCharacters: 193,
|
||||
textWords: 42,
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,118 @@
|
||||
\documentclass{article}
|
||||
\usepackage{graphicx}
|
||||
\usepackage{amsmath}
|
||||
|
||||
\title{The Title} % 2 in headers
|
||||
\author{An Author} % 0
|
||||
\date{May 2025} % 0
|
||||
|
||||
\begin{document}
|
||||
|
||||
\maketitle % 0
|
||||
|
||||
\thanks{bleep bloop}
|
||||
|
||||
\begin{abstract}
|
||||
Word word % 2 in abstract
|
||||
\end{abstract}
|
||||
|
||||
\section{plain text}
|
||||
Word word % 2
|
||||
|
||||
\section{accents}
|
||||
w\'ard w\`erd w\"ird w\~ord w\.urd w\^ord % 6
|
||||
|
||||
\section{accents with groups}
|
||||
% w\'{a}rd w\`{e}rd w\"{i}rd w\~{o}rd w\.{u}rd w\^{o}rd w\c{o}rd w\u{o}rd % 8 % TODO
|
||||
|
||||
\section{grouped single character command}
|
||||
% w{\o}rd w\~{\o}rd % 2 % TODO
|
||||
|
||||
\section{commands that create characters}
|
||||
\o\oe\aa\AE % 1
|
||||
|
||||
\subsection{with braces}
|
||||
w\o{}rd w\oe{}rd w\aa{}rd w\AE{}rd % 4
|
||||
|
||||
\subsection{with spaces}
|
||||
w\o rd w\oe rd w\ae rd w\AE rd % 4
|
||||
|
||||
\section{symbols}
|
||||
\S{} \P{} % 0
|
||||
|
||||
\section{formatting}
|
||||
\textit{italic} \textbf{bold} \textit{\textbf{bold italic}} % 4
|
||||
\texttt{teletype} \textsf{sans-serif} \textsc{small caps} % 4
|
||||
\textsf{-} % 0
|
||||
|
||||
\section{formatting inside word}
|
||||
% wo\textit{italic}rd %1 % TODO
|
||||
|
||||
\section{commands that create text}
|
||||
\textbackslash{}word \LaTeX{} % 2
|
||||
|
||||
\section{special characters}
|
||||
word\&word \$word word\% \#word wo\_rd \{word\} % 7
|
||||
|
||||
\section{footnote}
|
||||
\footnote{word word} % 2 in footnote
|
||||
|
||||
\section{headers}
|
||||
\part{word} % 1
|
||||
\chapter{word} % 1
|
||||
\section{word} % 1
|
||||
\subsection{word} % 1
|
||||
\subsubsection{word} % 1
|
||||
\paragraph{word} % 1
|
||||
|
||||
\section{verbatim}
|
||||
\verb|word word word| % 0
|
||||
|
||||
\section{list}
|
||||
\begin{itemize}
|
||||
\item word % 1
|
||||
\item word % 1
|
||||
\end{itemize}
|
||||
|
||||
\section{figure}
|
||||
\begin{figure}
|
||||
\includegraphics[width=0.5\linewidth]{example.png} %0
|
||||
\caption{Word word} %2 in captions
|
||||
\end{figure}
|
||||
|
||||
\section{table}
|
||||
\begin{table}
|
||||
\begin{tabular}{c|c}
|
||||
word & word \\ % 2
|
||||
word & word % 2
|
||||
\end{tabular}
|
||||
\caption{Word word} % 2 in captions
|
||||
\end{table}
|
||||
|
||||
\section{line break}
|
||||
word\\word % 2
|
||||
|
||||
\section{inline math}
|
||||
$2+3=5$ % 0
|
||||
|
||||
\section{display math}
|
||||
\[2+3=5\]
|
||||
|
||||
\begin{equation}
|
||||
2+3=5
|
||||
\end{equation}
|
||||
|
||||
\begin{equation*}
|
||||
2+3=5
|
||||
\end{equation*}
|
||||
|
||||
\begin{align*}
|
||||
2x - 5y &= 8 \\
|
||||
3x + 9y &= -12
|
||||
\end{align*}
|
||||
|
||||
\section{text in math}
|
||||
|
||||
$ 2+3 \text{ is equal to } 5 $
|
||||
|
||||
\end{document}
|
||||
Reference in New Issue
Block a user