From ab55d54c4593c973fcc61160a7677eabe5aeab6c Mon Sep 17 00:00:00 2001 From: Alf Eaton Date: Thu, 6 Mar 2025 09:56:58 +0000 Subject: [PATCH] Add custom words to the `en_US` dictionary (#24108) GitOrigin-RevId: 44587567340b4a6ff357f6a321055554a800d799 --- .../source-editor/hunspell/HunspellManager.ts | 2 +- .../source-editor/hunspell/hunspell.worker.ts | 14 +- .../source-editor/hunspell/wasm/hunspell.d.ts | 6 + .../wordlists/dictionary-additions.ts | 18 + .../hunspell/wordlists/en_US.txt | 535 ++++++++++++++++++ services/web/types/assets.d.ts | 5 + services/web/webpack.config.js | 7 + 7 files changed, 581 insertions(+), 6 deletions(-) create mode 100644 services/web/frontend/js/features/source-editor/hunspell/wordlists/dictionary-additions.ts create mode 100644 services/web/frontend/js/features/source-editor/hunspell/wordlists/en_US.txt diff --git a/services/web/frontend/js/features/source-editor/hunspell/HunspellManager.ts b/services/web/frontend/js/features/source-editor/hunspell/HunspellManager.ts index 6513a07a49..794ede79a1 100644 --- a/services/web/frontend/js/features/source-editor/hunspell/HunspellManager.ts +++ b/services/web/frontend/js/features/source-editor/hunspell/HunspellManager.ts @@ -149,7 +149,7 @@ export class HunspellManager { this.hunspellWorker.postMessage({ type: 'init', lang: this.language, - learnedWords: this.learnedWords, // TODO: add words + learnedWords: this.learnedWords, baseAssetPath: this.baseAssetPath, dictionariesRoot: this.dictionariesRoot, }) diff --git a/services/web/frontend/js/features/source-editor/hunspell/hunspell.worker.ts b/services/web/frontend/js/features/source-editor/hunspell/hunspell.worker.ts index 5af156d38f..c1a8577f87 100644 --- a/services/web/frontend/js/features/source-editor/hunspell/hunspell.worker.ts +++ b/services/web/frontend/js/features/source-editor/hunspell/hunspell.worker.ts @@ -1,5 +1,6 @@ import Hunspell from './wasm/hunspell' import hunspellWasmPath from './wasm/hunspell.wasm' +import { buildAdditionalDictionary } from './wordlists/dictionary-additions' type SpellChecker = { spell(words: string[]): { index: number }[] @@ -52,6 +53,7 @@ const createSpellChecker = async ({ 'number', 'number', ]) + const addDic = cwrap('Hunspell_add_dic', 'number', ['number', 'number']) const addWord = cwrap('Hunspell_add', 'number', ['number', 'number']) const removeWord = cwrap('Hunspell_remove', 'number', ['number', 'number']) const freeList = cwrap('Hunspell_free_list', 'number', [ @@ -81,11 +83,13 @@ const createSpellChecker = async ({ const affPtr = stringToNewUTF8('/dictionaries/index.aff') const spellPtr = create(affPtr, dicPtr) - for (const word of learnedWords) { - const wordPtr = stringToNewUTF8(word) - addWord(spellPtr, wordPtr) - _free(wordPtr) - } + FS.writeFile( + '/dictionaries/extra.dic', + await buildAdditionalDictionary(lang, learnedWords) + ) + const extraDicPtr = stringToNewUTF8('/dictionaries/extra.dic') + addDic(spellPtr, extraDicPtr) + _free(extraDicPtr) const spellChecker: SpellChecker = { spell(words) { diff --git a/services/web/frontend/js/features/source-editor/hunspell/wasm/hunspell.d.ts b/services/web/frontend/js/features/source-editor/hunspell/wasm/hunspell.d.ts index 2f3973ac81..ee04ced09f 100644 --- a/services/web/frontend/js/features/source-editor/hunspell/wasm/hunspell.d.ts +++ b/services/web/frontend/js/features/source-editor/hunspell/wasm/hunspell.d.ts @@ -25,6 +25,12 @@ declare class Hunspell { input: string[] ): (spellPtr: number, suggestionListPtr: number, wordPtr: number) => number + cwrap( + method: 'Hunspell_add_dic', + output: string, + input: string[] + ): (spellPtr: number, wordPtr: number) => number + cwrap( method: 'Hunspell_add', output: string, diff --git a/services/web/frontend/js/features/source-editor/hunspell/wordlists/dictionary-additions.ts b/services/web/frontend/js/features/source-editor/hunspell/wordlists/dictionary-additions.ts new file mode 100644 index 0000000000..585e364f73 --- /dev/null +++ b/services/web/frontend/js/features/source-editor/hunspell/wordlists/dictionary-additions.ts @@ -0,0 +1,18 @@ +const dictionaryAdditions = new Set(['en_US']) + +export const buildAdditionalDictionary = async ( + lang: string, + learnedWords: string[] +) => { + const words = [...learnedWords] + + if (dictionaryAdditions.has(lang)) { + const wordList = await import(`./${lang}.txt`).then(m => m.default) + words.push(...wordList.split('\n').filter(Boolean)) + } + + // the first line contains the approximate word count + words.unshift(String(words.length)) + + return new TextEncoder().encode(words.join('\n')) +} diff --git a/services/web/frontend/js/features/source-editor/hunspell/wordlists/en_US.txt b/services/web/frontend/js/features/source-editor/hunspell/wordlists/en_US.txt new file mode 100644 index 0000000000..c90ef8a42b --- /dev/null +++ b/services/web/frontend/js/features/source-editor/hunspell/wordlists/en_US.txt @@ -0,0 +1,535 @@ +Abaqus +abelian +ACF +ACS +AdaBoost +AFM +AGN +AIC +AlexNet +AMR +anharmonicity +ANNs +ANOVA +ANSYS +apriori +arcsec +ARIMA +ARMA +arXiv +ASIC +ASR +asymptotics +AUC +autoencoder +autoencoders +AWGN +axion +backend +backhaul +backpropagation +Balmer +beamformer +beamforming +beamline +BER +BFGS +BiLSTM +binarization +binarized +Biot +bistability +BLEU +BMI +Bogoliubov +Bonferroni +Boussinesq +Cartan +cartesian +casted +CDF +CDM +centerline +CFD +CFL +Chebyshev +Cholesky +CIFAR +CMB +CNC +CNF +Colab +collisionless +comorbidities +compactification +COMSOL +confounders +CoV +CRF +crosslinking +CSI +CubeSat +CUDA +cumulant +cumulants +CVD +DAC +DAQ +datacenter +datasheet +DBSCAN +DCT +debiasing +Debye +deformability +delocalization +denoise +denoised +denoising +DenseNet +densification +dephasing +depthwise +detrended +deviatoric +DevOps +DFS +DFT +dichalcogenides +dimerization +discretizations +discriminability +dispatchable +distinguishability +distributionally +DNN +DNNs +DOF +DOFs +DQN +DRL +Drude +DSC +DTU +dynamicity +ECM +eco +EDA +EfficientNet +eigen +eigenmode +eigenmodes +EKF +electrolyzer +EMG +endogeneity +ensembling +equivariant +ERP +ESS +Eulerian +exceedance +explainability +Fabry +FCN +FDM +FEA +feedforward +FFT +finetune +finetuned +finetuning +Floquet +fluorophore +fluorophores +Fock +forcings +FOV +FPGA +FPGAs +freestream +Frobenius +FSM +FTIR +functionalization +functionalized +functionals +FWHM +GaAs +Galerkin +GANs +gapless +GARCH +gaussian +Gaussianity +GCN +GDPR +GHG +Gini +GIS +GLM +GMM +GNN +GNNs +GNSS +GPP +GPT +grayscale +GRU +Gurobi +hadronization +Hankel +Hartree +HCI +HCl +heatmap +heatmaps +hemodynamic +heteroskedasticity +HMD +holonomic +homodyne +homophily +Hopfield +HPC +HVAC +hypergraph +hypergraphs +hyperparameter +hyperspectral +hypersurface +hypersurfaces +hysteretic +ICP +ICT +IDF +iff +IID +ILP +ImageJ +ImageNet +IMU +incentivizes +incentivizing +injectivity +inpainting +interferogram +interparticle +intertemporal +intraday +invasively +IoU +IQR +Ising +iso +Jaccard +Jupyter +JWST +Kaggle +Kalman +Keras +ket +keypoint +keypoints +Kinect +KNN +kpc +KPI +KPIs +Krylov +Kubernetes +Kutta +LabVIEW +Langevin +Larmor +LCA +LDA +Lennard +Levenberg +LHC +LHS +LIGO +Likert +Lindblad +linewidth +LLM +LLMs +logit +logits +lognormal +LoRa +LQR +LSTM +LSTMs +LTE +LTI +macroscale +Majorana +makespan +MAPE +MapReduce +Marquardt +MCMC +MCU +MDP +measurability +medoids +mesoscale +metaheuristic +metaheuristics +metamodel +meV +microenvironment +microfluidic +microfluidics +microgrid +microgrids +microscale +microservice +microservices +MILP +MIMO +MIP +misclassifications +missingness +MLE +MLP +MLPs +mmWave +MNIST +MobileNet +monodisperse +MOSFET +MOSFETs +MPC +MPI +MQTT +MSE +multiclass +multiobjective +multiphysics +multipoles +multiscale +mV +NaOH +NER +neuro +neuromorphic +NIR +NIST +NLP +NN +NNs +Noether +nonconvex +nondegenerate +nonlinearities +nonlinearly +nonlocal +nonstationary +Nusselt +ODEs +odometry +OFDM +OLS +omics +OpenCV +OpenFOAM +OpenMP +OpenStreetMap +operationalization +overdamped +overfit +overfitting +overpotential +PageRank +parameterization +parameterizations +parameterizing +paraxial +passivation +PCA +PCC +PCR +PDEs +PDMS +Peltier +perceptron +perceptrons +perturbatively +photovoltaics +piezo +PIV +PLA +planform +PMMA +PMT +polarimetric +polarizers +polytropic +posedness +poset +postprocessing +PPO +Prandtl +preconditioner +premia +prespecified +pretrained +pretraining +prosumers +proto +PSD +PSNR +PSO +PV +PWM +QFT +quadcopter +quadrotor +quintiles +Raman +RANSAC +RBF +RDF +recyclability +regularizer +ReLU +reparameterization +ResNet +reweighted +reweighting +RHS +Ricci +Riesz +RL +RLC +RMSE +RNN +RNNs +RoBERTa +ROS +RPC +RSSI +RTT +Runge +SaaS +SAR +scalings +scatterplot +Schwarz +scikit +SCM +Scopus +SDE +SDG +SDN +SDP +SDR +SDSS +SED +serverless +setpoint +setpoints +SFR +sharding +sigmoidal +Simulink +SiO +SLA +SMA +SMC +Smirnov +SMT +SNE +Sobel +Sobolev +sociodemographic +softmax +SolidWorks +soliton +solitons +SOTA +spacelike +spanwise +sparsification +SPI +spintronic +spintronics +SSIM +Stackelberg +Stata +STFT +STL +STM +stochasticity +streamwise +Strouhal +subcarrier +subcarriers +subgraph +subgraphs +subgrid +sublinear +submodular +subproblems +superlinear +supremum +SVD +SVM +SVMs +TEM +TeV +thermalization +thermo +thermomechanical +thermophysical +thresholding +Tikhonov +timelike +timeseries +timestep +Toeplitz +TOF +TP +tradeoff +tradeoffs +transcriptomic +translationally +tri +Uber +UMAP +UML +undeformed +underactuated +underexplored +underfitting +undoped +UNet +unmodeled +unpolarized +VAE +VAEs +VGG +virial +viscoelasticity +ViT +Voigt +Voronoi +voxel +voxels +Waals +walkthrough +Wasserstein +wavefunction +wavefunctions +wavenumber +wavenumbers +wettability +Wilcoxon +XAI +XGBoost +Xilinx +XRD +Yukawa +Zehnder diff --git a/services/web/types/assets.d.ts b/services/web/types/assets.d.ts index 1aa1a145b6..458256dab7 100644 --- a/services/web/types/assets.d.ts +++ b/services/web/types/assets.d.ts @@ -22,3 +22,8 @@ declare module '*.wasm' { const src: string export default src } + +declare module '*.txt' { + const src: string + export default src +} diff --git a/services/web/webpack.config.js b/services/web/webpack.config.js index 3917985461..72fa660589 100644 --- a/services/web/webpack.config.js +++ b/services/web/webpack.config.js @@ -158,6 +158,13 @@ module.exports = { filename: 'js/[name]-[contenthash][ext]', }, }, + { + test: /\.txt$/, + type: 'asset/source', + generator: { + filename: 'js/[name]-[contenthash][ext]', + }, + }, { // Pass Less files through less-loader/css-loader/mini-css-extract- // plugin (note: run in reverse order)