Use MEMFS in Hunspell worker (#22998)

GitOrigin-RevId: ef60b2c333484499e6289c51eacab04b983e1090
2026-06-04 14:49:01 +02:00 · 2025-01-22 09:37:41 +00:00
parent f36963a12e
commit 8d39ed16ef
4 changed files with 16 additions and 183 deletions
@@ -12,11 +12,10 @@ emmake make

 em++ \
    -s EXPORTED_FUNCTIONS="['_Hunspell_create', '_Hunspell_destroy', '_Hunspell_spell', '_Hunspell_suggest', '_Hunspell_free_list', '_Hunspell_add_dic', '_Hunspell_add', '_Hunspell_remove', '_free', '_malloc', 'FS']" \
-    -s EXPORTED_RUNTIME_METHODS="['ccall', 'cwrap', 'getValue', 'stringToNewUTF8', 'UTF8ToString', 'WORKERFS']" \
+    -s EXPORTED_RUNTIME_METHODS="['ccall', 'cwrap', 'getValue', 'stringToNewUTF8', 'UTF8ToString', 'MEMFS']" \
    -s ENVIRONMENT=worker \
    -s STACK_SIZE=5MB \
    -s ALLOW_MEMORY_GROWTH \
-    -lworkerfs.js \
    -O2 \
    -g2 \
    src/hunspell/.libs/libhunspell-1.7.a \
@@ -33,7 +33,7 @@ const createSpellChecker = async ({
  const {
    cwrap,
    FS,
-    WORKERFS,
+    MEMFS,
    stringToNewUTF8,
    _malloc,
    _free,
@@ -66,23 +66,16 @@ const createSpellChecker = async ({

  const [dic, aff] = await Promise.all([
    fetch(new URL(`./${lang}.dic`, dictionariesRootURL)).then(response =>
-      response.blob()
+      response.arrayBuffer()
    ),
    fetch(new URL(`./${lang}.aff`, dictionariesRootURL)).then(response =>
-      response.blob()
+      response.arrayBuffer()
    ),
  ])

-  FS.mount(
-    WORKERFS,
-    {
-      blobs: [
-        { name: 'index.dic', data: dic },
-        { name: 'index.aff', data: aff },
-      ],
-    },
-    '/dictionaries'
-  )
+  FS.mount(MEMFS, {}, '/dictionaries')
+  FS.writeFile('/dictionaries/index.dic', new Uint8Array(dic))
+  FS.writeFile('/dictionaries/index.aff', new Uint8Array(aff))

  const dicPtr = stringToNewUTF8('/dictionaries/index.dic')
  const affPtr = stringToNewUTF8('/dictionaries/index.aff')
@@ -50,14 +50,15 @@ declare class Hunspell {
  getValue(ptr: number, type: string): number
  FS: {
    mkdir(path: string): void
-    mount(
-      type: any,
-      data: { blobs: Record<{ name: string; data: BlobPart }>[] },
-      dir: string
-    ): void
+    mount(type: any, opts: Record<string, any>, dir: string): void
+    writeFile(
+      path: string,
+      data: string | ArrayBufferView,
+      opts?: { flags?: string }
+    )
  }

-  WORKERFS: any
+  MEMFS: any
 }

 declare const factory = async (options?: Record<string, any>) =>
@@ -154,19 +154,6 @@ var wasmMemory;
 // set in exit() and abort()
 var ABORT = false;

-// In STRICT mode, we only define assert() when ASSERTIONS is set.  i.e. we
-// don't define it at all in release modes.  This matches the behaviour of
-// MINIMAL_RUNTIME.
-// TODO(sbc): Make this the default even without STRICT enabled.
-/** @type {function(*, string=)} */ function assert(condition, text) {
-  if (!condition) {
-    // This build was created without ASSERTIONS defined.  `assert()` should not
-    // ever be called in this configuration but in case there are callers in
-    // the wild leave this simple abort() implementation here for now.
-    abort(text);
-  }
-}
-
 // Memory management
 var /** @type {!Int8Array} */ HEAP8, /** @type {!Uint8Array} */ HEAPU8, /** @type {!Int16Array} */ HEAP16, /** @type {!Uint16Array} */ HEAPU16, /** @type {!Int32Array} */ HEAP32, /** @type {!Uint32Array} */ HEAPU32, /** @type {!Float32Array} */ HEAPF32, /** @type {!Float64Array} */ HEAPF64;

@@ -1435,152 +1422,6 @@ var FS_getMode = (canRead, canWrite) => {
  return mode;
 };

-var WORKERFS = {
-  DIR_MODE: 16895,
-  FILE_MODE: 33279,
-  reader: null,
-  mount(mount) {
-    assert(ENVIRONMENT_IS_WORKER);
-    if (!WORKERFS.reader) WORKERFS.reader = new FileReaderSync;
-    var root = WORKERFS.createNode(null, "/", WORKERFS.DIR_MODE, 0);
-    var createdParents = {};
-    function ensureParent(path) {
-      // return the parent node, creating subdirs as necessary
-      var parts = path.split("/");
-      var parent = root;
-      for (var i = 0; i < parts.length - 1; i++) {
-        var curr = parts.slice(0, i + 1).join("/");
-        // Issue 4254: Using curr as a node name will prevent the node
-        // from being found in FS.nameTable when FS.open is called on
-        // a path which holds a child of this node,
-        // given that all FS functions assume node names
-        // are just their corresponding parts within their given path,
-        // rather than incremental aggregates which include their parent's
-        // directories.
-        createdParents[curr] ||= WORKERFS.createNode(parent, parts[i], WORKERFS.DIR_MODE, 0);
-        parent = createdParents[curr];
-      }
-      return parent;
-    }
-    function base(path) {
-      var parts = path.split("/");
-      return parts[parts.length - 1];
-    }
-    // We also accept FileList here, by using Array.prototype
-    Array.prototype.forEach.call(mount.opts["files"] || [], function(file) {
-      WORKERFS.createNode(ensureParent(file.name), base(file.name), WORKERFS.FILE_MODE, 0, file, file.lastModifiedDate);
-    });
-    (mount.opts["blobs"] || []).forEach(obj => {
-      WORKERFS.createNode(ensureParent(obj["name"]), base(obj["name"]), WORKERFS.FILE_MODE, 0, obj["data"]);
-    });
-    (mount.opts["packages"] || []).forEach(pack => {
-      pack["metadata"].files.forEach(file => {
-        var name = file.filename.substr(1);
-        // remove initial slash
-        WORKERFS.createNode(ensureParent(name), base(name), WORKERFS.FILE_MODE, 0, pack["blob"].slice(file.start, file.end));
-      });
-    });
-    return root;
-  },
-  createNode(parent, name, mode, dev, contents, mtime) {
-    var node = FS.createNode(parent, name, mode);
-    node.mode = mode;
-    node.node_ops = WORKERFS.node_ops;
-    node.stream_ops = WORKERFS.stream_ops;
-    node.timestamp = (mtime || new Date).getTime();
-    assert(WORKERFS.FILE_MODE !== WORKERFS.DIR_MODE);
-    if (mode === WORKERFS.FILE_MODE) {
-      node.size = contents.size;
-      node.contents = contents;
-    } else {
-      node.size = 4096;
-      node.contents = {};
-    }
-    if (parent) {
-      parent.contents[name] = node;
-    }
-    return node;
-  },
-  node_ops: {
-    getattr(node) {
-      return {
-        dev: 1,
-        ino: node.id,
-        mode: node.mode,
-        nlink: 1,
-        uid: 0,
-        gid: 0,
-        rdev: 0,
-        size: node.size,
-        atime: new Date(node.timestamp),
-        mtime: new Date(node.timestamp),
-        ctime: new Date(node.timestamp),
-        blksize: 4096,
-        blocks: Math.ceil(node.size / 4096)
-      };
-    },
-    setattr(node, attr) {
-      if (attr.mode !== undefined) {
-        node.mode = attr.mode;
-      }
-      if (attr.timestamp !== undefined) {
-        node.timestamp = attr.timestamp;
-      }
-    },
-    lookup(parent, name) {
-      throw new FS.ErrnoError(44);
-    },
-    mknod(parent, name, mode, dev) {
-      throw new FS.ErrnoError(63);
-    },
-    rename(oldNode, newDir, newName) {
-      throw new FS.ErrnoError(63);
-    },
-    unlink(parent, name) {
-      throw new FS.ErrnoError(63);
-    },
-    rmdir(parent, name) {
-      throw new FS.ErrnoError(63);
-    },
-    readdir(node) {
-      var entries = [ ".", ".." ];
-      for (var key of Object.keys(node.contents)) {
-        entries.push(key);
-      }
-      return entries;
-    },
-    symlink(parent, newName, oldPath) {
-      throw new FS.ErrnoError(63);
-    }
-  },
-  stream_ops: {
-    read(stream, buffer, offset, length, position) {
-      if (position >= stream.node.size) return 0;
-      var chunk = stream.node.contents.slice(position, position + length);
-      var ab = WORKERFS.reader.readAsArrayBuffer(chunk);
-      buffer.set(new Uint8Array(ab), offset);
-      return chunk.size;
-    },
-    write(stream, buffer, offset, length, position) {
-      throw new FS.ErrnoError(29);
-    },
-    llseek(stream, offset, whence) {
-      var position = offset;
-      if (whence === 1) {
-        position += stream.position;
-      } else if (whence === 2) {
-        if (FS.isFile(stream.node.mode)) {
-          position += stream.node.size;
-        }
-      }
-      if (position < 0) {
-        throw new FS.ErrnoError(28);
-      }
-      return position;
-    }
-  }
-};
-
 var FS = {
  root: null,
  mounts: [],
@@ -2796,8 +2637,7 @@ var FS = {
    FS.createDefaultDevices();
    FS.createSpecialDirectories();
    FS.filesystems = {
-      "MEMFS": MEMFS,
-      "WORKERFS": WORKERFS
+      "MEMFS": MEMFS
    };
  },
  init(input, output, error) {
@@ -3872,7 +3712,7 @@ Module["UTF8ToString"] = UTF8ToString;

 Module["stringToNewUTF8"] = stringToNewUTF8;

-Module["WORKERFS"] = WORKERFS;
+Module["MEMFS"] = MEMFS;

 var calledRun;