Use MEMFS in Hunspell worker (#22998)

GitOrigin-RevId: ef60b2c333484499e6289c51eacab04b983e1090
This commit is contained in:
Alf Eaton
2025-01-22 09:37:41 +00:00
committed by Copybot
parent f36963a12e
commit 8d39ed16ef
4 changed files with 16 additions and 183 deletions
@@ -12,11 +12,10 @@ emmake make
em++ \
-s EXPORTED_FUNCTIONS="['_Hunspell_create', '_Hunspell_destroy', '_Hunspell_spell', '_Hunspell_suggest', '_Hunspell_free_list', '_Hunspell_add_dic', '_Hunspell_add', '_Hunspell_remove', '_free', '_malloc', 'FS']" \
-s EXPORTED_RUNTIME_METHODS="['ccall', 'cwrap', 'getValue', 'stringToNewUTF8', 'UTF8ToString', 'WORKERFS']" \
-s EXPORTED_RUNTIME_METHODS="['ccall', 'cwrap', 'getValue', 'stringToNewUTF8', 'UTF8ToString', 'MEMFS']" \
-s ENVIRONMENT=worker \
-s STACK_SIZE=5MB \
-s ALLOW_MEMORY_GROWTH \
-lworkerfs.js \
-O2 \
-g2 \
src/hunspell/.libs/libhunspell-1.7.a \
@@ -33,7 +33,7 @@ const createSpellChecker = async ({
const {
cwrap,
FS,
WORKERFS,
MEMFS,
stringToNewUTF8,
_malloc,
_free,
@@ -66,23 +66,16 @@ const createSpellChecker = async ({
const [dic, aff] = await Promise.all([
fetch(new URL(`./${lang}.dic`, dictionariesRootURL)).then(response =>
response.blob()
response.arrayBuffer()
),
fetch(new URL(`./${lang}.aff`, dictionariesRootURL)).then(response =>
response.blob()
response.arrayBuffer()
),
])
FS.mount(
WORKERFS,
{
blobs: [
{ name: 'index.dic', data: dic },
{ name: 'index.aff', data: aff },
],
},
'/dictionaries'
)
FS.mount(MEMFS, {}, '/dictionaries')
FS.writeFile('/dictionaries/index.dic', new Uint8Array(dic))
FS.writeFile('/dictionaries/index.aff', new Uint8Array(aff))
const dicPtr = stringToNewUTF8('/dictionaries/index.dic')
const affPtr = stringToNewUTF8('/dictionaries/index.aff')
@@ -50,14 +50,15 @@ declare class Hunspell {
getValue(ptr: number, type: string): number
FS: {
mkdir(path: string): void
mount(
type: any,
data: { blobs: Record<{ name: string; data: BlobPart }>[] },
dir: string
): void
mount(type: any, opts: Record<string, any>, dir: string): void
writeFile(
path: string,
data: string | ArrayBufferView,
opts?: { flags?: string }
)
}
WORKERFS: any
MEMFS: any
}
declare const factory = async (options?: Record<string, any>) =>
@@ -154,19 +154,6 @@ var wasmMemory;
// set in exit() and abort()
var ABORT = false;
// In STRICT mode, we only define assert() when ASSERTIONS is set. i.e. we
// don't define it at all in release modes. This matches the behaviour of
// MINIMAL_RUNTIME.
// TODO(sbc): Make this the default even without STRICT enabled.
/** @type {function(*, string=)} */ function assert(condition, text) {
if (!condition) {
// This build was created without ASSERTIONS defined. `assert()` should not
// ever be called in this configuration but in case there are callers in
// the wild leave this simple abort() implementation here for now.
abort(text);
}
}
// Memory management
var /** @type {!Int8Array} */ HEAP8, /** @type {!Uint8Array} */ HEAPU8, /** @type {!Int16Array} */ HEAP16, /** @type {!Uint16Array} */ HEAPU16, /** @type {!Int32Array} */ HEAP32, /** @type {!Uint32Array} */ HEAPU32, /** @type {!Float32Array} */ HEAPF32, /** @type {!Float64Array} */ HEAPF64;
@@ -1435,152 +1422,6 @@ var FS_getMode = (canRead, canWrite) => {
return mode;
};
var WORKERFS = {
DIR_MODE: 16895,
FILE_MODE: 33279,
reader: null,
mount(mount) {
assert(ENVIRONMENT_IS_WORKER);
if (!WORKERFS.reader) WORKERFS.reader = new FileReaderSync;
var root = WORKERFS.createNode(null, "/", WORKERFS.DIR_MODE, 0);
var createdParents = {};
function ensureParent(path) {
// return the parent node, creating subdirs as necessary
var parts = path.split("/");
var parent = root;
for (var i = 0; i < parts.length - 1; i++) {
var curr = parts.slice(0, i + 1).join("/");
// Issue 4254: Using curr as a node name will prevent the node
// from being found in FS.nameTable when FS.open is called on
// a path which holds a child of this node,
// given that all FS functions assume node names
// are just their corresponding parts within their given path,
// rather than incremental aggregates which include their parent's
// directories.
createdParents[curr] ||= WORKERFS.createNode(parent, parts[i], WORKERFS.DIR_MODE, 0);
parent = createdParents[curr];
}
return parent;
}
function base(path) {
var parts = path.split("/");
return parts[parts.length - 1];
}
// We also accept FileList here, by using Array.prototype
Array.prototype.forEach.call(mount.opts["files"] || [], function(file) {
WORKERFS.createNode(ensureParent(file.name), base(file.name), WORKERFS.FILE_MODE, 0, file, file.lastModifiedDate);
});
(mount.opts["blobs"] || []).forEach(obj => {
WORKERFS.createNode(ensureParent(obj["name"]), base(obj["name"]), WORKERFS.FILE_MODE, 0, obj["data"]);
});
(mount.opts["packages"] || []).forEach(pack => {
pack["metadata"].files.forEach(file => {
var name = file.filename.substr(1);
// remove initial slash
WORKERFS.createNode(ensureParent(name), base(name), WORKERFS.FILE_MODE, 0, pack["blob"].slice(file.start, file.end));
});
});
return root;
},
createNode(parent, name, mode, dev, contents, mtime) {
var node = FS.createNode(parent, name, mode);
node.mode = mode;
node.node_ops = WORKERFS.node_ops;
node.stream_ops = WORKERFS.stream_ops;
node.timestamp = (mtime || new Date).getTime();
assert(WORKERFS.FILE_MODE !== WORKERFS.DIR_MODE);
if (mode === WORKERFS.FILE_MODE) {
node.size = contents.size;
node.contents = contents;
} else {
node.size = 4096;
node.contents = {};
}
if (parent) {
parent.contents[name] = node;
}
return node;
},
node_ops: {
getattr(node) {
return {
dev: 1,
ino: node.id,
mode: node.mode,
nlink: 1,
uid: 0,
gid: 0,
rdev: 0,
size: node.size,
atime: new Date(node.timestamp),
mtime: new Date(node.timestamp),
ctime: new Date(node.timestamp),
blksize: 4096,
blocks: Math.ceil(node.size / 4096)
};
},
setattr(node, attr) {
if (attr.mode !== undefined) {
node.mode = attr.mode;
}
if (attr.timestamp !== undefined) {
node.timestamp = attr.timestamp;
}
},
lookup(parent, name) {
throw new FS.ErrnoError(44);
},
mknod(parent, name, mode, dev) {
throw new FS.ErrnoError(63);
},
rename(oldNode, newDir, newName) {
throw new FS.ErrnoError(63);
},
unlink(parent, name) {
throw new FS.ErrnoError(63);
},
rmdir(parent, name) {
throw new FS.ErrnoError(63);
},
readdir(node) {
var entries = [ ".", ".." ];
for (var key of Object.keys(node.contents)) {
entries.push(key);
}
return entries;
},
symlink(parent, newName, oldPath) {
throw new FS.ErrnoError(63);
}
},
stream_ops: {
read(stream, buffer, offset, length, position) {
if (position >= stream.node.size) return 0;
var chunk = stream.node.contents.slice(position, position + length);
var ab = WORKERFS.reader.readAsArrayBuffer(chunk);
buffer.set(new Uint8Array(ab), offset);
return chunk.size;
},
write(stream, buffer, offset, length, position) {
throw new FS.ErrnoError(29);
},
llseek(stream, offset, whence) {
var position = offset;
if (whence === 1) {
position += stream.position;
} else if (whence === 2) {
if (FS.isFile(stream.node.mode)) {
position += stream.node.size;
}
}
if (position < 0) {
throw new FS.ErrnoError(28);
}
return position;
}
}
};
var FS = {
root: null,
mounts: [],
@@ -2796,8 +2637,7 @@ var FS = {
FS.createDefaultDevices();
FS.createSpecialDirectories();
FS.filesystems = {
"MEMFS": MEMFS,
"WORKERFS": WORKERFS
"MEMFS": MEMFS
};
},
init(input, output, error) {
@@ -3872,7 +3712,7 @@ Module["UTF8ToString"] = UTF8ToString;
Module["stringToNewUTF8"] = stringToNewUTF8;
Module["WORKERFS"] = WORKERFS;
Module["MEMFS"] = MEMFS;
var calledRun;