From 25027e0155dfb0e26e267bd592a69ca976c91876 Mon Sep 17 00:00:00 2001 From: Alf Eaton Date: Wed, 22 Jan 2025 09:37:41 +0000 Subject: [PATCH] Use MEMFS in Hunspell worker (#22998) GitOrigin-RevId: ef60b2c333484499e6289c51eacab04b983e1090 --- .../source-editor/hunspell/compile.sh | 3 +- .../source-editor/hunspell/hunspell.worker.ts | 19 +- .../source-editor/hunspell/wasm/hunspell.d.ts | 13 +- .../source-editor/hunspell/wasm/hunspell.mjs | 164 +----------------- 4 files changed, 16 insertions(+), 183 deletions(-) diff --git a/services/web/frontend/js/features/source-editor/hunspell/compile.sh b/services/web/frontend/js/features/source-editor/hunspell/compile.sh index 889c76cea2..57dea4c0fd 100755 --- a/services/web/frontend/js/features/source-editor/hunspell/compile.sh +++ b/services/web/frontend/js/features/source-editor/hunspell/compile.sh @@ -12,11 +12,10 @@ emmake make em++ \ -s EXPORTED_FUNCTIONS="['_Hunspell_create', '_Hunspell_destroy', '_Hunspell_spell', '_Hunspell_suggest', '_Hunspell_free_list', '_Hunspell_add_dic', '_Hunspell_add', '_Hunspell_remove', '_free', '_malloc', 'FS']" \ - -s EXPORTED_RUNTIME_METHODS="['ccall', 'cwrap', 'getValue', 'stringToNewUTF8', 'UTF8ToString', 'WORKERFS']" \ + -s EXPORTED_RUNTIME_METHODS="['ccall', 'cwrap', 'getValue', 'stringToNewUTF8', 'UTF8ToString', 'MEMFS']" \ -s ENVIRONMENT=worker \ -s STACK_SIZE=5MB \ -s ALLOW_MEMORY_GROWTH \ - -lworkerfs.js \ -O2 \ -g2 \ src/hunspell/.libs/libhunspell-1.7.a \ diff --git a/services/web/frontend/js/features/source-editor/hunspell/hunspell.worker.ts b/services/web/frontend/js/features/source-editor/hunspell/hunspell.worker.ts index b056434b37..5af156d38f 100644 --- a/services/web/frontend/js/features/source-editor/hunspell/hunspell.worker.ts +++ b/services/web/frontend/js/features/source-editor/hunspell/hunspell.worker.ts @@ -33,7 +33,7 @@ const createSpellChecker = async ({ const { cwrap, FS, - WORKERFS, + MEMFS, stringToNewUTF8, _malloc, _free, @@ -66,23 +66,16 @@ const createSpellChecker = async ({ const [dic, aff] = await Promise.all([ fetch(new URL(`./${lang}.dic`, dictionariesRootURL)).then(response => - response.blob() + response.arrayBuffer() ), fetch(new URL(`./${lang}.aff`, dictionariesRootURL)).then(response => - response.blob() + response.arrayBuffer() ), ]) - FS.mount( - WORKERFS, - { - blobs: [ - { name: 'index.dic', data: dic }, - { name: 'index.aff', data: aff }, - ], - }, - '/dictionaries' - ) + FS.mount(MEMFS, {}, '/dictionaries') + FS.writeFile('/dictionaries/index.dic', new Uint8Array(dic)) + FS.writeFile('/dictionaries/index.aff', new Uint8Array(aff)) const dicPtr = stringToNewUTF8('/dictionaries/index.dic') const affPtr = stringToNewUTF8('/dictionaries/index.aff') diff --git a/services/web/frontend/js/features/source-editor/hunspell/wasm/hunspell.d.ts b/services/web/frontend/js/features/source-editor/hunspell/wasm/hunspell.d.ts index 6b15ba55c5..2f3973ac81 100644 --- a/services/web/frontend/js/features/source-editor/hunspell/wasm/hunspell.d.ts +++ b/services/web/frontend/js/features/source-editor/hunspell/wasm/hunspell.d.ts @@ -50,14 +50,15 @@ declare class Hunspell { getValue(ptr: number, type: string): number FS: { mkdir(path: string): void - mount( - type: any, - data: { blobs: Record<{ name: string; data: BlobPart }>[] }, - dir: string - ): void + mount(type: any, opts: Record, dir: string): void + writeFile( + path: string, + data: string | ArrayBufferView, + opts?: { flags?: string } + ) } - WORKERFS: any + MEMFS: any } declare const factory = async (options?: Record) => diff --git a/services/web/frontend/js/features/source-editor/hunspell/wasm/hunspell.mjs b/services/web/frontend/js/features/source-editor/hunspell/wasm/hunspell.mjs index ec6cf66d7c..65fde1c543 100644 --- a/services/web/frontend/js/features/source-editor/hunspell/wasm/hunspell.mjs +++ b/services/web/frontend/js/features/source-editor/hunspell/wasm/hunspell.mjs @@ -154,19 +154,6 @@ var wasmMemory; // set in exit() and abort() var ABORT = false; -// In STRICT mode, we only define assert() when ASSERTIONS is set. i.e. we -// don't define it at all in release modes. This matches the behaviour of -// MINIMAL_RUNTIME. -// TODO(sbc): Make this the default even without STRICT enabled. -/** @type {function(*, string=)} */ function assert(condition, text) { - if (!condition) { - // This build was created without ASSERTIONS defined. `assert()` should not - // ever be called in this configuration but in case there are callers in - // the wild leave this simple abort() implementation here for now. - abort(text); - } -} - // Memory management var /** @type {!Int8Array} */ HEAP8, /** @type {!Uint8Array} */ HEAPU8, /** @type {!Int16Array} */ HEAP16, /** @type {!Uint16Array} */ HEAPU16, /** @type {!Int32Array} */ HEAP32, /** @type {!Uint32Array} */ HEAPU32, /** @type {!Float32Array} */ HEAPF32, /** @type {!Float64Array} */ HEAPF64; @@ -1435,152 +1422,6 @@ var FS_getMode = (canRead, canWrite) => { return mode; }; -var WORKERFS = { - DIR_MODE: 16895, - FILE_MODE: 33279, - reader: null, - mount(mount) { - assert(ENVIRONMENT_IS_WORKER); - if (!WORKERFS.reader) WORKERFS.reader = new FileReaderSync; - var root = WORKERFS.createNode(null, "/", WORKERFS.DIR_MODE, 0); - var createdParents = {}; - function ensureParent(path) { - // return the parent node, creating subdirs as necessary - var parts = path.split("/"); - var parent = root; - for (var i = 0; i < parts.length - 1; i++) { - var curr = parts.slice(0, i + 1).join("/"); - // Issue 4254: Using curr as a node name will prevent the node - // from being found in FS.nameTable when FS.open is called on - // a path which holds a child of this node, - // given that all FS functions assume node names - // are just their corresponding parts within their given path, - // rather than incremental aggregates which include their parent's - // directories. - createdParents[curr] ||= WORKERFS.createNode(parent, parts[i], WORKERFS.DIR_MODE, 0); - parent = createdParents[curr]; - } - return parent; - } - function base(path) { - var parts = path.split("/"); - return parts[parts.length - 1]; - } - // We also accept FileList here, by using Array.prototype - Array.prototype.forEach.call(mount.opts["files"] || [], function(file) { - WORKERFS.createNode(ensureParent(file.name), base(file.name), WORKERFS.FILE_MODE, 0, file, file.lastModifiedDate); - }); - (mount.opts["blobs"] || []).forEach(obj => { - WORKERFS.createNode(ensureParent(obj["name"]), base(obj["name"]), WORKERFS.FILE_MODE, 0, obj["data"]); - }); - (mount.opts["packages"] || []).forEach(pack => { - pack["metadata"].files.forEach(file => { - var name = file.filename.substr(1); - // remove initial slash - WORKERFS.createNode(ensureParent(name), base(name), WORKERFS.FILE_MODE, 0, pack["blob"].slice(file.start, file.end)); - }); - }); - return root; - }, - createNode(parent, name, mode, dev, contents, mtime) { - var node = FS.createNode(parent, name, mode); - node.mode = mode; - node.node_ops = WORKERFS.node_ops; - node.stream_ops = WORKERFS.stream_ops; - node.timestamp = (mtime || new Date).getTime(); - assert(WORKERFS.FILE_MODE !== WORKERFS.DIR_MODE); - if (mode === WORKERFS.FILE_MODE) { - node.size = contents.size; - node.contents = contents; - } else { - node.size = 4096; - node.contents = {}; - } - if (parent) { - parent.contents[name] = node; - } - return node; - }, - node_ops: { - getattr(node) { - return { - dev: 1, - ino: node.id, - mode: node.mode, - nlink: 1, - uid: 0, - gid: 0, - rdev: 0, - size: node.size, - atime: new Date(node.timestamp), - mtime: new Date(node.timestamp), - ctime: new Date(node.timestamp), - blksize: 4096, - blocks: Math.ceil(node.size / 4096) - }; - }, - setattr(node, attr) { - if (attr.mode !== undefined) { - node.mode = attr.mode; - } - if (attr.timestamp !== undefined) { - node.timestamp = attr.timestamp; - } - }, - lookup(parent, name) { - throw new FS.ErrnoError(44); - }, - mknod(parent, name, mode, dev) { - throw new FS.ErrnoError(63); - }, - rename(oldNode, newDir, newName) { - throw new FS.ErrnoError(63); - }, - unlink(parent, name) { - throw new FS.ErrnoError(63); - }, - rmdir(parent, name) { - throw new FS.ErrnoError(63); - }, - readdir(node) { - var entries = [ ".", ".." ]; - for (var key of Object.keys(node.contents)) { - entries.push(key); - } - return entries; - }, - symlink(parent, newName, oldPath) { - throw new FS.ErrnoError(63); - } - }, - stream_ops: { - read(stream, buffer, offset, length, position) { - if (position >= stream.node.size) return 0; - var chunk = stream.node.contents.slice(position, position + length); - var ab = WORKERFS.reader.readAsArrayBuffer(chunk); - buffer.set(new Uint8Array(ab), offset); - return chunk.size; - }, - write(stream, buffer, offset, length, position) { - throw new FS.ErrnoError(29); - }, - llseek(stream, offset, whence) { - var position = offset; - if (whence === 1) { - position += stream.position; - } else if (whence === 2) { - if (FS.isFile(stream.node.mode)) { - position += stream.node.size; - } - } - if (position < 0) { - throw new FS.ErrnoError(28); - } - return position; - } - } -}; - var FS = { root: null, mounts: [], @@ -2796,8 +2637,7 @@ var FS = { FS.createDefaultDevices(); FS.createSpecialDirectories(); FS.filesystems = { - "MEMFS": MEMFS, - "WORKERFS": WORKERFS + "MEMFS": MEMFS }; }, init(input, output, error) { @@ -3872,7 +3712,7 @@ Module["UTF8ToString"] = UTF8ToString; Module["stringToNewUTF8"] = stringToNewUTF8; -Module["WORKERFS"] = WORKERFS; +Module["MEMFS"] = MEMFS; var calledRun;