Merge pull request #24468 from overleaf/mj-client-side-references

[web] Perform ARS on client-side

GitOrigin-RevId: 19703c82758cae450fe52463ad9612d3a2383ba0
This commit is contained in:
Mathias Jakobsen
2025-09-02 10:41:41 +01:00
committed by Copybot
parent f6820ed794
commit ed0c4c447e
21 changed files with 3040 additions and 26 deletions

View File

@@ -380,6 +380,7 @@ const _ProjectController = {
'word-count-client',
'editor-popup-ux-survey',
'new-editor-error-logs-redesign',
'client-side-references',
].filter(Boolean)
const getUserValues = async userId =>

View File

@@ -1032,6 +1032,7 @@ module.exports = {
integrationPanelComponents: [],
referenceSearchSetting: [],
errorLogsComponents: [],
referenceIndices: [],
},
moduleImportSequence: [

View File

@@ -46,6 +46,12 @@ const buildConfig = () => {
'../../frontend/js/features/source-editor/hunspell/hunspell.worker'
)
// add entrypoint under '/' for references worker
addWorker(
'references-worker',
'../../frontend/js/features/ide-react/references/references.worker.ts'
)
// add entrypoints under '/' for pdfjs workers
addWorker('pdfjs-dist', 'pdfjs-dist/build/pdf.worker.mjs')

View File

@@ -87,13 +87,13 @@ export const ReactContextRoot: FC<
<Providers.EditorProvider>
<Providers.FileTreeDataProvider>
<Providers.FileTreePathProvider>
<Providers.ReferencesProvider>
<Providers.UserFeaturesProvider>
<Providers.PermissionsProvider>
<Providers.RailProvider>
<Providers.LayoutProvider>
<Providers.ProjectSettingsProvider>
<Providers.EditorManagerProvider>
<Providers.UserFeaturesProvider>
<Providers.PermissionsProvider>
<Providers.RailProvider>
<Providers.LayoutProvider>
<Providers.ProjectSettingsProvider>
<Providers.EditorManagerProvider>
<Providers.ReferencesProvider>
<Providers.LocalCompileProvider>
<Providers.DetachCompileProvider>
<Providers.ChatProvider>
@@ -113,13 +113,13 @@ export const ReactContextRoot: FC<
</Providers.ChatProvider>
</Providers.DetachCompileProvider>
</Providers.LocalCompileProvider>
</Providers.EditorManagerProvider>
</Providers.ProjectSettingsProvider>
</Providers.LayoutProvider>
</Providers.RailProvider>
</Providers.PermissionsProvider>
</Providers.UserFeaturesProvider>
</Providers.ReferencesProvider>
</Providers.ReferencesProvider>
</Providers.EditorManagerProvider>
</Providers.ProjectSettingsProvider>
</Providers.LayoutProvider>
</Providers.RailProvider>
</Providers.PermissionsProvider>
</Providers.UserFeaturesProvider>
</Providers.FileTreePathProvider>
</Providers.FileTreeDataProvider>
</Providers.EditorProvider>

View File

@@ -7,21 +7,31 @@ import {
useCallback,
useMemo,
useState,
useRef,
} from 'react'
import { useIdeReactContext } from '@/features/ide-react/context/ide-react-context'
import { useConnectionContext } from '@/features/ide-react/context/connection-context'
import { postJSON } from '@/infrastructure/fetch-json'
import { ShareJsDoc } from '@/features/ide-react/editor/share-js-doc'
import { useFileTreeData } from '@/shared/context/file-tree-data-context'
import { findDocEntityById } from '@/features/ide-react/util/find-doc-entity-by-id'
import { IdeEvents } from '@/features/ide-react/create-ide-event-emitter'
import { debugConsole } from '@/utils/debugging'
import useEventListener from '@/shared/hooks/use-event-listener'
import { useProjectContext } from '@/shared/context/project-context'
import { useEditorManagerContext } from './editor-manager-context'
import { signalWithTimeout } from '@/utils/abort-signal'
import { postJSON } from '@/infrastructure/fetch-json'
import { debugConsole } from '@/utils/debugging'
import { useFeatureFlag } from '@/shared/context/split-test-context'
import type { ReferenceIndexer } from '../references/reference-indexer'
import { AdvancedReferenceSearchResult } from '@/features/ide-react/references/types'
export const ReferencesContext = createContext<
| {
referenceKeys: Set<string>
indexAllReferences: (shouldBroadcast: boolean) => Promise<void>
searchLocalReferences: (
query: string
) => Promise<AdvancedReferenceSearchResult>
}
| undefined
>(undefined)
@@ -32,14 +42,18 @@ export const ReferencesProvider: FC<React.PropsWithChildren> = ({
const { fileTreeData } = useFileTreeData()
const { eventEmitter, projectId } = useIdeReactContext()
const { socket } = useConnectionContext()
const { projectSnapshot } = useProjectContext()
const { openDocs } = useEditorManagerContext()
const abortControllerRef = useRef<AbortController | null>(null)
const [referenceKeys, setReferenceKeys] = useState(new Set<string>())
const clientSideReferences = useFeatureFlag('client-side-references')
const [existingIndexHash, setExistingIndexHash] = useState<
Record<string, { hash: string; timestamp: number }>
>({})
const indexAllReferences = useCallback(
const indexAllReferencesServerside = useCallback(
async (shouldBroadcast: boolean) => {
return postJSON(`/project/${projectId}/references/indexAll`, {
body: {
@@ -57,6 +71,54 @@ export const ReferencesProvider: FC<React.PropsWithChildren> = ({
[projectId]
)
const indexerRef = useRef<Promise<ReferenceIndexer> | null>(null)
if (clientSideReferences && indexerRef.current === null) {
indexerRef.current = import('../references/reference-indexer').then(
m => new m.ReferenceIndexer()
)
}
const indexAllReferencesLocally = useCallback(
async (shouldBroadcast: boolean) => {
abortControllerRef.current?.abort()
if (!indexerRef.current) {
return
}
abortControllerRef.current = new AbortController()
const signal = abortControllerRef.current.signal
await openDocs.awaitBufferedOps(signalWithTimeout(signal, 5000))
await projectSnapshot.refresh()
if (signal.aborted) {
return
}
const indexer = await indexerRef.current
const keys = await indexer.updateFromSnapshot(projectSnapshot, { signal })
if (signal.aborted) {
return
}
setReferenceKeys(keys)
if (shouldBroadcast) {
// Inform other clients about change in keys
await postJSON(`/project/${projectId}/references/indexAll`, {
body: { shouldBroadcast: true },
}).catch(error => {
// allow the request to fail
debugConsole.error(error)
})
}
},
[projectSnapshot, openDocs, projectId]
)
const indexAllReferences = clientSideReferences
? indexAllReferencesLocally
: indexAllReferencesServerside
const indexReferencesIfDocModified = useCallback(
(doc: ShareJsDoc, shouldBroadcast: boolean) => {
// avoid reindexing references if the bib file has not changed since the
@@ -115,9 +177,13 @@ export const ReferencesProvider: FC<React.PropsWithChildren> = ({
// We only need to grab the references when the editor first loads,
// not on every reconnect
socket.on('references:keys:updated', (keys, allDocs) => {
setReferenceKeys(oldKeys =>
allDocs ? new Set(keys) : new Set([...oldKeys, ...keys])
)
if (clientSideReferences) {
indexAllReferences(false)
} else {
setReferenceKeys(oldDocs =>
allDocs ? new Set(keys) : new Set([...oldDocs, ...keys])
)
}
})
indexAllReferences(false)
}
@@ -127,14 +193,26 @@ export const ReferencesProvider: FC<React.PropsWithChildren> = ({
return () => {
eventEmitter.off('project:joined', handleProjectJoined)
}
}, [eventEmitter, indexAllReferences, socket])
}, [eventEmitter, indexAllReferences, socket, clientSideReferences])
const searchLocalReferences = useCallback(
async (query: string): Promise<AdvancedReferenceSearchResult> => {
if (!indexerRef.current) {
return { hits: [] }
}
const indexer = await indexerRef.current
return await indexer.search(query)
},
[]
)
const value = useMemo(
() => ({
referenceKeys,
indexAllReferences,
searchLocalReferences,
}),
[indexAllReferences, referenceKeys]
[indexAllReferences, referenceKeys, searchLocalReferences]
)
return (

View File

@@ -0,0 +1,24 @@
import { ReferenceIndex } from './reference-index'
import { Changes } from './types'
export default class BasicReferenceIndex extends ReferenceIndex {
fileIndex: Map<string, Set<string>> = new Map()
updateIndex({ updates, deletes }: Changes): Set<string> {
for (const path of deletes) {
this.fileIndex.delete(path)
}
for (const { path, content } of updates) {
const fileReferences: Set<string> = new Set()
const entries = this.parseEntries(content)
for (const entry of entries) {
fileReferences.add(entry.EntryKey)
}
this.fileIndex.set(path, fileReferences)
}
this.keys = new Set(
this.fileIndex.values().flatMap(entry => Array.from(entry))
)
return this.keys
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,43 @@
import Bib2Json from './bib2json'
import { AdvancedReferenceSearchResult, Bib2JsonEntry, Changes } from './types'
export abstract class ReferenceIndex {
keys: Set<string> = new Set()
abstract updateIndex({ updates, deletes }: Changes): void
async search(_query: string): Promise<AdvancedReferenceSearchResult> {
return { hits: [] }
}
getKeys(): Set<string> {
return this.keys
}
parseEntries(content: string): Bib2JsonEntry[] {
const allowedFields = ['author', 'journal', 'title', 'year', 'date']
// @ts-expect-error Bib2Json works as both a constructor and a function
const { entries } = Bib2Json(content, allowedFields)
for (const entry of entries) {
if (entry.Fields?.year) {
entry.Fields.year = parseInt(entry.Fields.year).toString()
if (entry.Fields.year === 'NaN') {
delete entry.Fields.year
}
}
setDefaultFields(entry.Fields)
}
return entries
}
}
function setDefaultFields(
fields: Partial<Bib2JsonEntry['Fields']>
): Bib2JsonEntry['Fields'] {
const requiredFields = ['author', 'journal', 'title', 'date', 'year'] as const
for (const field of requiredFields) {
if (!fields[field]) {
fields[field] = ''
}
}
return fields as Bib2JsonEntry['Fields']
}

View File

@@ -0,0 +1,137 @@
import { ProjectSnapshot } from '@/infrastructure/project-snapshot'
import { generateSHA1Hash } from '@/shared/utils/sha1'
import { AdvancedReferenceSearchResult, Changes } from './types'
import { debugConsole } from '@/utils/debugging'
import type { ReferenceWorkerResponse } from './references.worker'
const ONE_MB = 1024 * 1024
const MAX_BIB_DATA_SIZE = 6 * ONE_MB
export class ReferenceIndexer {
private fileIndexHash: Map<string, string> = new Map()
private worker: Worker
private updateResolve: ((result: Set<string>) => void) | null = null
private searchResolve:
| ((result: AdvancedReferenceSearchResult) => void)
| null = null
constructor() {
this.worker = new Worker(
/* webpackChunkName: "references-worker" */
new URL('./references.worker.ts', import.meta.url),
{ type: 'module' }
)
this.worker.addEventListener('message', evt => this.handleMessage(evt))
}
private handleMessage(event: MessageEvent) {
const data = event.data as ReferenceWorkerResponse
if (data.type === 'searchResult' && this.searchResolve) {
this.searchResolve(data.result)
this.searchResolve = null
} else if (data.type === 'updateKeys' && this.updateResolve) {
this.updateResolve(data.keys)
this.updateResolve = null
} else {
debugConsole.warn('Received unknown message from worker:', data.type)
}
}
async updateFromSnapshot(
snapshot: Pick<
ProjectSnapshot,
| 'getDocPaths'
| 'getDocContents'
| 'getBinaryFilePathsWithHash'
| 'getBinaryFileContents'
>,
{
dataLimit = MAX_BIB_DATA_SIZE,
signal,
}: { dataLimit?: number; signal: AbortSignal }
): Promise<Set<string>> {
const nextFileHashIndex = new Map(this.fileIndexHash)
const previousPaths = new Set(this.fileIndexHash.keys())
let dataBudget = dataLimit
const docs = snapshot
.getDocPaths()
.filter(path => path.toLowerCase().endsWith('.bib'))
const changes: Changes = { updates: [], deletes: [] }
for (const path of docs) {
previousPaths.delete(path)
if (dataBudget <= 0) {
continue
}
const content = snapshot.getDocContents(path)?.slice(0, dataBudget)
if (content == null) {
continue
}
dataBudget -= content.length
const hash = generateSHA1Hash(content)
const possibleMatch = nextFileHashIndex.get(path)
if (possibleMatch === undefined || possibleMatch !== hash) {
// New or changed file
nextFileHashIndex.set(path, hash)
changes.updates.push({ path, content })
}
}
const files = snapshot
.getBinaryFilePathsWithHash()
.filter(({ path }) => path.toLowerCase().endsWith('.bib'))
.sort((a, b) => a.size - b.size)
for (const { path, hash, size } of files) {
if (signal.aborted) {
debugConsole.warn('Aborted indexing references due to signal')
return new Set()
}
previousPaths.delete(path)
if (nextFileHashIndex.get(path) === hash) {
dataBudget -= size
// Already indexed
continue
}
if (dataBudget <= 0) {
continue
}
const content = await snapshot.getBinaryFileContents(path, {
maxSize: dataBudget,
})
dataBudget -= content.length
nextFileHashIndex.set(path, hash)
changes.updates.push({ path, content })
}
previousPaths.forEach(path => {
// Deleted file
changes.deletes.push(path)
nextFileHashIndex.delete(path)
})
if (dataBudget <= 0) {
debugConsole.warn('Data budget exceeded while updating references index')
}
this.fileIndexHash = nextFileHashIndex
this.worker.postMessage({
type: 'update',
changes,
})
return new Promise(resolve => {
this.updateResolve = resolve
})
}
async search(query: string): Promise<AdvancedReferenceSearchResult> {
this.worker.postMessage({ type: 'search', query })
const { promise, resolve } =
Promise.withResolvers<AdvancedReferenceSearchResult>()
this.searchResolve = resolve
return promise
}
}

View File

@@ -0,0 +1,47 @@
import BasicReferenceIndex from './basic-reference-index'
import { ReferenceIndex } from './reference-index'
import { AdvancedReferenceSearchResult, Changes } from './types'
import importOverleafModules from '../../../../macros/import-overleaf-module.macro'
interface IndexConstructor {
new (): ReferenceIndex
}
const indices = importOverleafModules('referenceIndices') as {
import: { default: IndexConstructor }
path: string
}[]
export type ReferenceWorkerRequest =
| { type: 'update'; changes: Changes }
| { type: 'search'; query: string }
export type ReferenceWorkerResponse =
| { type: 'updateKeys'; keys: Set<string> }
| { type: 'searchResult'; result: AdvancedReferenceSearchResult }
function createIndex(): ReferenceIndex {
const Klass = indices[0]?.import.default ?? BasicReferenceIndex
return new Klass()
}
const indexer: ReferenceIndex = createIndex()
self.addEventListener('message', async (event: MessageEvent) => {
const message = event.data as ReferenceWorkerRequest
switch (message.type) {
case 'update':
indexer.updateIndex(message.changes)
self.postMessage({ type: 'updateKeys', keys: indexer.getKeys() })
break
case 'search': {
const result = await indexer.search(message.query)
self.postMessage({ type: 'searchResult', result })
break
}
default:
console.error('Unknown message type:', message)
}
})

View File

@@ -0,0 +1,23 @@
export type Bib2JsonEntry = {
EntryKey: string
Fields: {
author: string
date: string
journal: string
title: string
year: string
}
}
export type AdvancedReferenceSearchResult = {
hits: {
_source: Bib2JsonEntry
}[]
}
export type ReferenceEntry = Map<string, Bib2JsonEntry>
export type Changes = {
updates: { path: string; content: string }[]
deletes: string[]
}

View File

@@ -56,6 +56,9 @@ export const VisualPreview: FC<{ view: EditorView }> = ({ view }) => {
labels: new Set(),
packageNames: new Set(),
referenceKeys: new Set(),
searchLocalReferences() {
return Promise.resolve({ hits: [] })
},
commands: [],
fileTreeData,
})

View File

@@ -10,6 +10,7 @@ import { indentUnit, LanguageDescription } from '@codemirror/language'
import { updateHasEffect } from '../utils/effects'
import { Folder } from '../../../../../types/folder'
import { Command } from '@/features/ide-react/context/metadata-context'
import { AdvancedReferenceSearchResult } from '@/features/ide-react/references/types'
export const languageLoadedEffect = StateEffect.define()
export const hasLanguageLoadedEffect = updateHasEffect(languageLoadedEffect)
@@ -25,6 +26,9 @@ export type Metadata = {
packageNames: Set<string>
commands: Command[]
referenceKeys: Set<string>
searchLocalReferences: (
query: string
) => Promise<AdvancedReferenceSearchResult>
fileTreeData: Folder
}

View File

@@ -104,7 +104,7 @@ function useCodeMirrorScope(view: EditorView) {
const { showVisual: visual, trackChanges } = useEditorPropertiesContext()
const { referenceKeys } = useReferencesContext()
const { referenceKeys, searchLocalReferences } = useReferencesContext()
const ranges = useRangesContext()
const threads = useThreadsContext()
@@ -227,6 +227,7 @@ function useCodeMirrorScope(view: EditorView) {
const metadataRef = useRef({
...metadata,
referenceKeys,
searchLocalReferences,
fileTreeData,
})
@@ -246,6 +247,14 @@ function useCodeMirrorScope(view: EditorView) {
})
}, [view, referenceKeys])
// listen to project reference search updates
useEffect(() => {
metadataRef.current.searchLocalReferences = searchLocalReferences
window.setTimeout(() => {
view.dispatch(setMetadata(metadataRef.current))
})
}, [view, searchLocalReferences])
// listen to project root folder updates
useEffect(() => {
if (fileTreeData) {

View File

@@ -61,6 +61,30 @@ export class ProjectSnapshot {
return allPaths.filter(path => this.snapshot.getFile(path)?.isEditable())
}
/**
* Get the list of paths to binary files.
*/
getBinaryFilePathsWithHash(): { path: string; hash: string; size: number }[] {
const allPaths = this.snapshot.getFilePathnames()
const paths = []
for (const path of allPaths) {
const file = this.snapshot.getFile(path)
if (file == null || file.isEditable()) {
continue
}
const hash = file.getHash()
const size = file.getByteLength()
if (hash == null) {
continue
}
if (size == null) {
continue
}
paths.push({ path, hash, size })
}
return paths
}
/**
* Get the doc content at the given path.
*/
@@ -72,6 +96,18 @@ export class ProjectSnapshot {
return file.getContent({ filterTrackedDeletes: true }) ?? null
}
async getBinaryFileContents(
path: string,
options?: { maxSize?: number }
): Promise<any> {
const file = this.snapshot.getFile(path)
const hash = file?.getHash()
if (hash == null) {
return null
}
return await this.blobStore.getString(hash, options)
}
/**
* Immediately start a refresh
*/
@@ -166,8 +202,11 @@ class SimpleBlobStore {
this.projectId = projectId
}
async getString(hash: string): Promise<string> {
return await fetchBlob(this.projectId, hash)
async getString(
hash: string,
options?: { maxSize?: number }
): Promise<string> {
return await fetchBlob(this.projectId, hash, options)
}
async getObject(hash: string) {
@@ -226,11 +265,50 @@ async function fetchLatestChanges(
}
}
async function fetchBlob(projectId: string, hash: string): Promise<string> {
async function fetchBlob(
projectId: string,
hash: string,
options?: { maxSize?: number }
): Promise<string> {
const url = `/project/${projectId}/blob/${hash}`
if (options?.maxSize) {
return await fetchTextFileWithSizeLimit(url, options.maxSize)
}
const res = await fetch(url)
if (!res.ok) {
throw new FetchError('Failed to fetch blob', url, undefined, res)
}
return await res.text()
}
async function fetchTextFileWithSizeLimit(url: string, maxSize: number) {
let result = ''
try {
const abortController = new AbortController()
const response = await fetch(url, {
signal: abortController.signal,
})
if (!response.ok) {
throw new Error('Failed to fetch blob')
}
if (!response.body) {
throw new Error('Response body is empty')
}
const reader = response.body.pipeThrough(new TextDecoderStream())
for await (const chunk of reader) {
result += chunk
if (result.length > maxSize) {
abortController.abort()
}
}
} catch (error: any) {
if (error?.name === 'AbortError') {
// This is fine, we just return the result we have so far
} else {
throw error
}
}
return result.slice(0, maxSize)
}

View File

@@ -0,0 +1,104 @@
import BasicReferenceIndex from '@/features/ide-react/references/basic-reference-index'
import { expect } from 'chai'
const entry1 = `@article{sample2023,
author = {John Doe},
title = {Sample Title},
journal = {Sample Journal},
year = {2023},
date = {2023-01-01}
}`
const entry2 = `@book{example2022,
author = {Jane Smith},
title = {Example Book},
journal = {Example Journal},
year = {2022}
date = {2022-05-15}
}`
const entry3 = `@inproceedings{test2021,
author = {Alice Johnson},
title = {Test Conference Paper},
booktitle = {Test Conference},
year = {2021},
date = {2021-10-10}
}
`
const fileWithMultipleEntries = `${entry1}\n${entry2}`
const addEntry1 = { path: 'file1.bib', content: entry1 }
const addEntry2 = { path: 'file2.bib', content: entry2 }
const addEntry3 = { path: 'file3.bib', content: entry3 }
const addFileWithMultipleEntries = {
path: 'file5.bib',
content: fileWithMultipleEntries,
}
const deleteEntry2 = 'file2.bib'
describe('BasicReferenceIndex', function () {
beforeEach(function () {
this.index = new BasicReferenceIndex()
})
it('starts with an empty index', function () {
expect(this.index.fileIndex.size).to.equal(0)
expect(this.index.keys.size).to.equal(0)
})
describe('updateIndex', function () {
it('Adds entry to index and keys', function () {
const changes = { updates: [addEntry1], deletes: [] }
const keys = this.index.updateIndex(changes)
expect(this.index.fileIndex.size).to.equal(1)
expect(this.index.fileIndex.get('file1.bib')).to.deep.equal(
new Set(['sample2023'])
)
expect(keys).to.deep.equal(new Set(['sample2023']))
})
it("doesn't forget existing keys when adding new entries", function () {
const changes = { updates: [addEntry1, addEntry2], deletes: [] }
const keys = this.index.updateIndex(changes)
expect(this.index.fileIndex.size).to.equal(2)
expect(keys).to.deep.equal(new Set(['sample2023', 'example2022']))
const additionalChanges = { updates: [addEntry3], deletes: [] }
const updatedKeys = this.index.updateIndex(additionalChanges)
expect(this.index.fileIndex.size).to.equal(3)
expect(updatedKeys).to.deep.equal(
new Set(['sample2023', 'example2022', 'test2021'])
)
})
it('removes keys when files are deleted', function () {
const changes = {
updates: [addEntry1, addEntry2, addEntry3],
deletes: [],
}
this.index.updateIndex(changes)
expect(this.index.fileIndex.size).to.equal(3)
expect(this.index.keys).to.deep.equal(
new Set(['sample2023', 'example2022', 'test2021'])
)
const deletionChanges = { updates: [], deletes: [deleteEntry2] }
const keysAfterDeletion = this.index.updateIndex(deletionChanges)
expect(this.index.fileIndex.size).to.equal(2)
expect(keysAfterDeletion).to.deep.equal(
new Set(['sample2023', 'test2021'])
)
})
it('handles multiple entries in a single file', function () {
const changes = { updates: [addFileWithMultipleEntries], deletes: [] }
const keys = this.index.updateIndex(changes)
expect(this.index.fileIndex.size).to.equal(1)
expect(this.index.fileIndex.get('file5.bib')).to.deep.equal(
new Set(['sample2023', 'example2022'])
)
expect(keys).to.deep.equal(new Set(['sample2023', 'example2022']))
})
})
})

View File

@@ -0,0 +1,83 @@
import { expect } from 'chai'
import { ReferenceIndex } from '@/features/ide-react/references/reference-index'
class TestedReferenceIndex extends ReferenceIndex {
updateIndex(): void {
throw new Error('This is a test implementation')
}
}
describe('ReferenceIndex', function () {
beforeEach(function () {
this.index = new TestedReferenceIndex()
})
describe('parseEntries', function () {
it('should parse bib entry', function () {
const content = `
@article{sample2023,
author = {John Doe},
title = {Sample Title},
journal = {Sample Journal},
year = {2023},
date = {2023-01-01}
}
`
const entries = this.index.parseEntries(content)
expect(entries).to.have.lengthOf(1)
expect(entries[0]).to.deep.equal({
EntryKey: 'sample2023',
EntryType: 'article',
Fields: {
author: 'John Doe',
title: 'Sample Title',
journal: 'Sample Journal',
year: '2023',
date: '2023-01-01',
},
ObjectType: 'entry',
})
})
it('should default missing fields to empty strings', function () {
const content = `@article{sample2023,
author = {John Doe},
title = {Sample Title}
}`
const entries = this.index.parseEntries(content)
expect(entries).to.have.lengthOf(1)
expect(entries[0]).to.deep.equal({
EntryKey: 'sample2023',
EntryType: 'article',
Fields: {
author: 'John Doe',
title: 'Sample Title',
journal: '',
year: '',
date: '',
},
ObjectType: 'entry',
})
})
it('should handle multiple entries', function () {
const content = `@article{sample2023,
author = {John Doe},
title = {Sample Title},
journal = {Sample Journal},
year = {2023},
date = {2023-01-01}
}
@book{example2022,
author = {Jane Smith},
title = {Example Book},
journal = {Example Journal},
year = {2022},
date = {2022-05-15}
}`
const entries = this.index.parseEntries(content)
expect(entries).to.have.lengthOf(2)
expect(entries[0].EntryKey).to.equal('sample2023')
expect(entries[1].EntryKey).to.equal('example2022')
})
})
})

View File

@@ -0,0 +1,326 @@
import { ReferenceIndexer } from '@/features/ide-react/references/reference-indexer'
import { generateMD5Hash } from '@/shared/utils/md5'
import sinon from 'sinon'
const entry1 = `@article{sample2023,
author = {John Doe},
title = {Sample Title},
journal = {Sample Journal},
year = {2023},
date = {2023-01-01}
}`
const entry2 = `@book{example2022,
author = {Jane Smith},
title = {Example Book},
journal = {Example Journal},
year = {2022}
date = {2022-05-15}
}`
const entry3 = `@article{sample2024,
author = {John Doe},
title = {Sample Title},
journal = {Sample Journal},
year = {2024},
date = {2024-01-01}
}`
const entry4 = `@book{example2025,
author = {Jane Smith},
title = {Example Book},
journal = {Example Journal},
year = {2025}
date = {2025-05-15}
}`
const snapshotWithData = ({
docs,
files,
}: {
docs?: Record<string, string>
files?: Record<string, string>
}) => {
return {
getDocPaths: sinon.spy(() => Object.keys(docs ?? {})),
getDocContents: sinon.spy(path => (docs ? (docs[path] ?? null) : null)),
getBinaryFilePathsWithHash: sinon.spy(() => {
return Object.entries(files ?? {}).map(([path, content]) => ({
path,
hash: generateMD5Hash(content),
size: content.length,
}))
}),
getBinaryFileContents: sinon.spy(async path =>
files ? (files[path] ?? null) : null
),
}
}
const IGNORED_SIGNAL = new AbortController().signal
describe('ReferenceIndexer', function () {
it('it should index bib docs', async function () {
const referencer = new ReferenceIndexer()
const snapshot = snapshotWithData({
docs: {
'refs.bib': entry1,
'refs2.bib': entry2,
'other.tex': 'Not a bib file',
},
})
const result = await referencer.updateFromSnapshot(snapshot, {
signal: IGNORED_SIGNAL,
})
expect(snapshot.getDocPaths).to.have.been.calledOnce
expect(snapshot.getDocContents).to.have.been.calledTwice
expect(snapshot.getDocContents).to.have.been.calledWith('refs.bib')
expect(snapshot.getDocContents).to.have.been.calledWith('refs2.bib')
expect(snapshot.getDocContents).to.not.have.been.calledWith('other.tex')
expect(snapshot.getBinaryFileContents).to.not.have.been.called
expect(result).to.deep.equal(new Set(['sample2023', 'example2022']))
})
it('it should index bib binary files', async function () {
const referencer = new ReferenceIndexer()
const snapshot = snapshotWithData({
files: {
'refs.bib': entry1,
'refs2.bib': entry2,
'image.png': 'Not a bib file',
},
})
const result = await referencer.updateFromSnapshot(snapshot, {
signal: IGNORED_SIGNAL,
})
expect(snapshot.getDocPaths).to.have.been.calledOnce
expect(snapshot.getDocContents).to.not.have.been.called
expect(snapshot.getBinaryFilePathsWithHash).to.have.been.calledOnce
expect(snapshot.getBinaryFileContents).to.have.been.calledTwice
expect(snapshot.getBinaryFileContents).to.have.been.calledWith('refs.bib')
expect(snapshot.getBinaryFileContents).to.have.been.calledWith('refs2.bib')
expect(snapshot.getBinaryFileContents).to.not.have.been.calledWith(
'image.png'
)
expect(result).to.deep.equal(new Set(['sample2023', 'example2022']))
})
it('it should index both bib docs and binary files', async function () {
const referencer = new ReferenceIndexer()
const snapshot = snapshotWithData({
docs: {
'refs.bib': entry1,
'other.tex': 'Not a bib file',
},
files: {
'refs2.bib': entry2,
'image.png': 'Not a bib file',
},
})
const result = await referencer.updateFromSnapshot(snapshot, {
signal: IGNORED_SIGNAL,
})
expect(snapshot.getDocPaths).to.have.been.calledOnce
expect(snapshot.getDocContents).to.have.been.calledOnce
expect(snapshot.getDocContents).to.have.been.calledWith('refs.bib')
expect(snapshot.getDocContents).to.not.have.been.calledWith('other.tex')
expect(snapshot.getBinaryFilePathsWithHash).to.have.been.calledOnce
expect(snapshot.getBinaryFileContents).to.have.been.calledOnce
expect(snapshot.getBinaryFileContents).to.have.been.calledWith('refs2.bib')
expect(snapshot.getBinaryFileContents).to.not.have.been.calledWith(
'image.png'
)
expect(result).to.deep.equal(new Set(['sample2023', 'example2022']))
})
it('should not fetch binary files if unchanged', async function () {
const referencer = new ReferenceIndexer()
const initialSnapshot = snapshotWithData({
files: {
'refs.bib': entry1,
},
})
const initialResult = await referencer.updateFromSnapshot(initialSnapshot, {
signal: IGNORED_SIGNAL,
})
expect(initialSnapshot.getDocPaths).to.have.been.calledOnce
expect(initialSnapshot.getBinaryFilePathsWithHash).to.have.been.calledOnce
expect(initialSnapshot.getBinaryFileContents).to.have.been.calledOnceWith(
'refs.bib'
)
expect(initialResult).to.deep.equal(new Set(['sample2023']))
// Second snapshot with same files, should not fetch contents again
const secondSnapshot = snapshotWithData({
files: {
'refs.bib': entry1,
},
})
const secondResult = await referencer.updateFromSnapshot(secondSnapshot, {
signal: IGNORED_SIGNAL,
})
expect(secondSnapshot.getDocPaths).to.have.been.calledOnce
expect(secondSnapshot.getBinaryFilePathsWithHash).to.have.been.calledOnce
expect(secondSnapshot.getBinaryFileContents).to.not.have.been.called
expect(secondResult).to.deep.equal(new Set(['sample2023']))
})
it('should fetch changed binary file', async function () {
const referencer = new ReferenceIndexer()
const initialSnapshot = snapshotWithData({
files: {
'refs.bib': entry1,
},
})
const initialResult = await referencer.updateFromSnapshot(initialSnapshot, {
signal: IGNORED_SIGNAL,
})
expect(initialSnapshot.getDocPaths).to.have.been.calledOnce
expect(initialSnapshot.getBinaryFilePathsWithHash).to.have.been.calledOnce
expect(initialSnapshot.getBinaryFileContents).to.have.been.calledOnceWith(
'refs.bib'
)
expect(initialResult).to.deep.equal(new Set(['sample2023']))
// Second snapshot with a different file, should fetch contents again
const secondSnapshot = snapshotWithData({
files: {
'refs.bib': entry2,
},
})
const secondResult = await referencer.updateFromSnapshot(secondSnapshot, {
signal: IGNORED_SIGNAL,
})
expect(secondSnapshot.getDocPaths).to.have.been.calledOnce
expect(secondSnapshot.getBinaryFilePathsWithHash).to.have.been.calledOnce
expect(initialSnapshot.getBinaryFileContents).to.have.been.calledOnceWith(
'refs.bib'
)
expect(secondResult).to.deep.equal(new Set(['example2022']))
})
it('should update changed doc', async function () {
const referencer = new ReferenceIndexer()
const initialSnapshot = snapshotWithData({
docs: {
'refs.bib': entry1,
},
})
const initialResult = await referencer.updateFromSnapshot(initialSnapshot, {
signal: IGNORED_SIGNAL,
})
expect(initialResult).to.deep.equal(new Set(['sample2023']))
const secondSnapshot = snapshotWithData({
docs: {
'refs.bib': entry2,
},
})
const secondResult = await referencer.updateFromSnapshot(secondSnapshot, {
signal: IGNORED_SIGNAL,
})
expect(secondResult).to.deep.equal(new Set(['example2022']))
})
it('should notice deleted files', async function () {
const referencer = new ReferenceIndexer()
const initialSnapshot = snapshotWithData({
files: {
'refs.bib': entry1,
'refs2.bib': entry2,
},
})
const initialResult = await referencer.updateFromSnapshot(initialSnapshot, {
signal: IGNORED_SIGNAL,
})
expect(initialSnapshot.getDocPaths).to.have.been.calledOnce
expect(initialSnapshot.getBinaryFilePathsWithHash).to.have.been.calledOnce
expect(initialSnapshot.getBinaryFileContents).to.have.been.calledTwice
expect(initialResult).to.deep.equal(new Set(['sample2023', 'example2022']))
// Second snapshot with one file removed, should update index
const secondSnapshot = snapshotWithData({
files: {
'refs.bib': entry1,
},
})
const secondResult = await referencer.updateFromSnapshot(secondSnapshot, {
signal: IGNORED_SIGNAL,
})
expect(secondSnapshot.getDocPaths).to.have.been.calledOnce
expect(secondSnapshot.getBinaryFilePathsWithHash).to.have.been.calledOnce
expect(secondSnapshot.getBinaryFileContents).to.not.have.been.called
expect(secondResult).to.deep.equal(new Set(['sample2023']))
})
it('should notice deleted docs', async function () {
const referencer = new ReferenceIndexer()
const initialSnapshot = snapshotWithData({
docs: {
'refs.bib': entry1,
'refs2.bib': entry2,
},
})
const initialResult = await referencer.updateFromSnapshot(initialSnapshot, {
signal: IGNORED_SIGNAL,
})
expect(initialSnapshot.getDocPaths).to.have.been.calledOnce
expect(initialSnapshot.getDocContents).to.have.been.calledTwice
expect(initialResult).to.deep.equal(new Set(['sample2023', 'example2022']))
// Second snapshot with one doc removed, should update index
const secondSnapshot = snapshotWithData({
docs: {
'refs.bib': entry1,
},
})
const secondResult = await referencer.updateFromSnapshot(secondSnapshot, {
signal: IGNORED_SIGNAL,
})
expect(secondSnapshot.getDocPaths).to.have.been.calledOnce
expect(secondSnapshot.getDocContents).to.have.been.calledOnce
expect(secondResult).to.deep.equal(new Set(['sample2023']))
})
it('should abort when signalled', async function () {
const referencer = new ReferenceIndexer()
const snapshot = snapshotWithData({
files: {
'refs.bib': entry1,
'refs2.bib': entry2,
},
})
const controller = new AbortController()
controller.abort()
const result = await referencer.updateFromSnapshot(snapshot, {
signal: controller.signal,
})
expect(result).to.deep.equal(new Set())
})
it('should respect data budget', async function () {
async function testWithDataBudget(budget: number, keys: Set<string>) {
const referencer = new ReferenceIndexer()
const snapshot = snapshotWithData({
docs: {
'a.bib': entry1, // 140 bytes
'b.bib': entry2, // 140 bytes
'c.bib': entry3, // 140 bytes
'd.bib': entry4, // 140 bytes
},
})
const result = await referencer.updateFromSnapshot(snapshot, {
signal: IGNORED_SIGNAL,
dataLimit: budget,
})
expect(result).to.deep.equal(keys)
}
await testWithDataBudget(
1000,
new Set(['sample2023', 'example2022', 'sample2024', 'example2025'])
)
await testWithDataBudget(300, new Set(['sample2023', 'example2022']))
await testWithDataBudget(200, new Set(['sample2023']))
await testWithDataBudget(100, new Set())
})
})

View File

@@ -381,6 +381,11 @@ describe('autocomplete', { scrollBehavior: false }, function () {
value={{
referenceKeys: new Set(['ref-1', 'ref-2', 'ref-3']),
indexAllReferences: cy.stub(),
searchLocalReferences() {
return Promise.resolve({
hits: [],
})
},
}}
>
{children}

View File

@@ -47,6 +47,7 @@ import {
} from '@/shared/context/types/project-metadata'
import { UserId } from '../../../types/user'
import { ProjectCompiler } from '../../../types/project-settings'
import { ReferencesContext } from '@/features/ide-react/context/references-context'
// these constants can be imported in tests instead of
// using magic strings
@@ -243,6 +244,7 @@ export function EditorProviders({
}),
LayoutProvider: makeLayoutProvider(layoutContext),
ProjectProvider: makeProjectProvider(project),
ReferencesProvider: makeReferencesProvider(),
...providers,
}}
>
@@ -251,6 +253,27 @@ export function EditorProviders({
)
}
const makeReferencesProvider = () => {
const ReferencesProvider: FC<PropsWithChildren> = ({ children }) => {
return (
<ReferencesContext.Provider
value={{
referenceKeys: new Set(),
indexAllReferences: () => Promise.resolve(),
searchLocalReferences() {
return Promise.resolve({
hits: [],
})
},
}}
>
{children}
</ReferencesContext.Provider>
)
}
return ReferencesProvider
}
const makeConnectionProvider = (socket: Socket) => {
const ConnectionProvider: FC<PropsWithChildren> = ({ children }) => {
const [value] = useState(() => ({

View File

@@ -37,6 +37,13 @@ describe('ProjectSnapshot', function () {
contents: "We're done here",
hash: 'dddddddddddddddddddddddddddddddddddddddd',
},
'bibliography.bib': {
contents:
'@book{example2020,\n title={An example book},\n author={Doe, John},\n year={2020},\n publisher={Publisher}\n}\n'.repeat(
60_000
), // 6.5MB
hash: 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee',
},
}
const chunk = {
@@ -68,6 +75,13 @@ describe('ProjectSnapshot', function () {
byteLength: 97080,
},
},
{
pathname: 'bibliography.bib',
file: {
hash: files['bibliography.bib'].hash,
byteLength: files['bibliography.bib'].contents.length,
},
},
],
timestamp: '2025-01-01T12:00:00.000Z',
},
@@ -214,6 +228,45 @@ describe('ProjectSnapshot', function () {
)
})
})
describe('getBinaryFilePathsWithHash()', function () {
it('returns the binary files', function () {
const binaries = snapshot.getBinaryFilePathsWithHash()
expect(binaries).to.deep.equal([
{
path: 'frog.jpg',
hash: 'cccccccccccccccccccccccccccccccccccccccc',
size: 97080,
},
{
path: 'bibliography.bib',
hash: files['bibliography.bib'].hash,
size: files['bibliography.bib'].contents.length,
},
])
})
})
describe('getBinaryFileContents', function () {
beforeEach(function () {
mockBlobs(['bibliography.bib'])
})
it('can fetch whole file', async function () {
const blob = await snapshot.getBinaryFileContents('bibliography.bib')
expect(blob).to.equal(files['bibliography.bib'].contents)
})
// NOTE: fetch-mock does not support the .response.body.pipeThrough API,
// so this test is skipped for now.
// eslint-disable-next-line mocha/no-skipped-tests
it.skip('can fetch part of file', async function () {
const blob = await snapshot.getBinaryFileContents('bibliography.bib', {
maxSize: 100,
})
expect(blob).to.equal(files['bibliography.bib'].contents.slice(0, 100))
})
})
})
describe('concurrency', function () {