diff --git a/services/web/Dockerfile b/services/web/Dockerfile index cfd4469e00..9046917eb3 100644 --- a/services/web/Dockerfile +++ b/services/web/Dockerfile @@ -27,6 +27,11 @@ RUN update-ca-certificates # the deps image is used for caching yarn workspaces focus FROM base AS deps-prod +# Pyodide wheel bundle (~370 MB). Version + SHA-256 are pinned in the fetch +# script; keep that in sync with the pyodide dep in services/web/package.json. +COPY services/web/scripts/fetch-pyodide-packages.mjs /overleaf/services/web/scripts/fetch-pyodide-packages.mjs +RUN cd /overleaf/services/web && node scripts/fetch-pyodide-packages.mjs + COPY package.json yarn.lock .yarnrc.yml /overleaf/ COPY libraries/access-token-encryptor/package.json /overleaf/libraries/access-token-encryptor/package.json COPY libraries/eslint-plugin/package.json /overleaf/libraries/eslint-plugin/package.json diff --git a/services/web/Makefile b/services/web/Makefile index 4ec73389fa..bae608ede7 100644 --- a/services/web/Makefile +++ b/services/web/Makefile @@ -649,6 +649,7 @@ IMAGE_CACHE ?= $(IMAGE_REPO):cache-$(shell cat \ $(MONOREPO)/libraries/stream-utils/package.json \ $(MONOREPO)/libraries/validation-tools/package.json \ $(MONOREPO)/services/web/package.json \ + $(MONOREPO)/services/web/scripts/fetch-pyodide-packages.mjs \ $(MONOREPO)/patches/* \ | sha256sum | cut -d '-' -f1) diff --git a/services/web/cypress/fixtures/pyodide-packages/tomli-2.2.1-py3-none-any.whl b/services/web/cypress/fixtures/pyodide-packages/tomli-2.2.1-py3-none-any.whl deleted file mode 100644 index d42f9419ef..0000000000 Binary files a/services/web/cypress/fixtures/pyodide-packages/tomli-2.2.1-py3-none-any.whl and /dev/null differ diff --git a/services/web/cypress/support/webpack.cypress.ts b/services/web/cypress/support/webpack.cypress.ts index 84a244c5fc..8f78d25676 100644 --- a/services/web/cypress/support/webpack.cypress.ts +++ b/services/web/cypress/support/webpack.cypress.ts @@ -16,8 +16,8 @@ const buildConfig = () => { watch: false, }, { - directory: path.join(__dirname, '../fixtures/pyodide-packages'), - publicPath: '/pyodide-packages/', + directory: path.join(__dirname, '../../public/js/libs/pyodide'), + publicPath: '/__cypress/src/js/libs/pyodide/', watch: false, }, ], diff --git a/services/web/frontend/js/features/ide-react/components/editor/python/pyodide-worker-client.ts b/services/web/frontend/js/features/ide-react/components/editor/python/pyodide-worker-client.ts index c59720a7c1..c79caeaf8d 100644 --- a/services/web/frontend/js/features/ide-react/components/editor/python/pyodide-worker-client.ts +++ b/services/web/frontend/js/features/ide-react/components/editor/python/pyodide-worker-client.ts @@ -30,7 +30,6 @@ export type LifecycleCallback = ( export class PyodideWorkerClient { private worker: Worker private baseAssetPath: string - private packageBaseUrl: string | undefined private createWorker: () => Worker private listening = false private destroyed = false @@ -41,13 +40,11 @@ export class PyodideWorkerClient { constructor(options: { baseAssetPath: string - packageBaseUrl?: string createWorker: () => Worker onOutput?: OutputCallback onLifecycle?: LifecycleCallback }) { this.baseAssetPath = options.baseAssetPath - this.packageBaseUrl = options.packageBaseUrl this.createWorker = options.createWorker this.outputCallback = options.onOutput ?? null this.lifecycleCallback = options.onLifecycle ?? null @@ -57,7 +54,6 @@ export class PyodideWorkerClient { this.queueMessage({ type: 'init', baseAssetPath: this.baseAssetPath, - packageBaseUrl: this.packageBaseUrl, }) } @@ -101,7 +97,6 @@ export class PyodideWorkerClient { this.queueMessage({ type: 'init', baseAssetPath: this.baseAssetPath, - packageBaseUrl: this.packageBaseUrl, }) } diff --git a/services/web/frontend/js/features/ide-react/components/editor/python/pyodide-worker-messages.ts b/services/web/frontend/js/features/ide-react/components/editor/python/pyodide-worker-messages.ts index 78b621bc2f..8b5f08ad0b 100644 --- a/services/web/frontend/js/features/ide-react/components/editor/python/pyodide-worker-messages.ts +++ b/services/web/frontend/js/features/ide-react/components/editor/python/pyodide-worker-messages.ts @@ -15,7 +15,6 @@ export type OutputFileData = { export type InitRequest = { type: 'init' baseAssetPath: string - packageBaseUrl?: string } export type RunCodeRequest = { diff --git a/services/web/frontend/js/features/ide-react/components/editor/python/pyodide.worker.ts b/services/web/frontend/js/features/ide-react/components/editor/python/pyodide.worker.ts index fc4f3c020f..7bf4bbfbae 100644 --- a/services/web/frontend/js/features/ide-react/components/editor/python/pyodide.worker.ts +++ b/services/web/frontend/js/features/ide-react/components/editor/python/pyodide.worker.ts @@ -15,7 +15,6 @@ type PyodideModule = typeof import('pyodide') const PROJECT_FS_ROOT = '/project' const PROJECT_FS_PREFIX = `${PROJECT_FS_ROOT}/` const PYODIDE_INDEX_PATH = 'js/libs/pyodide/' -const PYODIDE_CDN_URL = 'https://cdn.jsdelivr.net/pyodide/v' function ensureDirectoryExists(fs: PyodideFS, filePath: string) { const directory = path.dirname(filePath) @@ -51,7 +50,7 @@ function syncProjectFiles(fs: PyodideFS, files: ProjectFileData[]) { } let pyodideModule: PyodideModule | null = null -let packageBaseUrlOverride: string | undefined +let pyodideIndexUrl: string | undefined async function loadPyodideModule(pyodideIndexUrl: string) { const runtimeModuleUrl = `${pyodideIndexUrl}pyodide.mjs` @@ -70,12 +69,7 @@ async function loadPyodideModule(pyodideIndexUrl: string) { } async function handleInit(msg: InitRequest) { - const pyodideIndexUrl = new URL( - PYODIDE_INDEX_PATH, - msg.baseAssetPath - ).toString() - - packageBaseUrlOverride = msg.packageBaseUrl + pyodideIndexUrl = new URL(PYODIDE_INDEX_PATH, msg.baseAssetPath).toString() try { pyodideModule = await loadPyodideModule(pyodideIndexUrl) @@ -93,7 +87,7 @@ async function handleInit(msg: InitRequest) { async function handleRunCode(msg: RunCodeRequest) { const { fileId, executionId } = msg - if (!pyodideModule) { + if (!pyodideModule || !pyodideIndexUrl) { self.postMessage({ type: 'output-line', stream: 'stderr', @@ -114,9 +108,7 @@ async function handleRunCode(msg: RunCodeRequest) { const instance = await pyodideModule.loadPyodide({ env: { MPLBACKEND: 'Agg' }, - packageBaseUrl: - packageBaseUrlOverride ?? - `${PYODIDE_CDN_URL}${pyodideModule.version}/full/`, + packageBaseUrl: `${pyodideIndexUrl}${pyodideModule.version}/`, }) const writtenPaths = new Set() diff --git a/services/web/frontend/js/features/ide-react/components/editor/python/python-runner.ts b/services/web/frontend/js/features/ide-react/components/editor/python/python-runner.ts index 693a284afa..4aa9a12025 100644 --- a/services/web/frontend/js/features/ide-react/components/editor/python/python-runner.ts +++ b/services/web/frontend/js/features/ide-react/components/editor/python/python-runner.ts @@ -43,7 +43,6 @@ export class PythonRunner { readonly fileId: string private client: PyodideWorkerClient | null = null private readonly baseAssetPath: string - private readonly packageBaseUrl: string | undefined private readonly createWorker: () => Worker private readonly getExecutionContext: () => Promise private listeners = new Set() @@ -55,12 +54,10 @@ export class PythonRunner { fileId: string, baseAssetPath: string, getExecutionContext: () => Promise, - createWorker: () => Worker, - packageBaseUrl?: string + createWorker: () => Worker ) { this.fileId = fileId this.baseAssetPath = baseAssetPath - this.packageBaseUrl = packageBaseUrl this.createWorker = createWorker this.getExecutionContext = getExecutionContext } @@ -102,7 +99,6 @@ export class PythonRunner { this.client = new PyodideWorkerClient({ baseAssetPath: this.baseAssetPath, - packageBaseUrl: this.packageBaseUrl, createWorker: this.createWorker, onLifecycle: event => { switch (event.type) { diff --git a/services/web/frontend/js/features/ide-react/context/python-execution-context.tsx b/services/web/frontend/js/features/ide-react/context/python-execution-context.tsx index fc74829cd2..417bbd79cd 100644 --- a/services/web/frontend/js/features/ide-react/context/python-execution-context.tsx +++ b/services/web/frontend/js/features/ide-react/context/python-execution-context.tsx @@ -37,9 +37,9 @@ export const PythonExecutionContext = createContext< PythonExecutionContextValue | undefined >(undefined) -export const PythonExecutionProvider: FC< - PropsWithChildren<{ packageBaseUrl?: string }> -> = ({ children, packageBaseUrl }) => { +export const PythonExecutionProvider: FC = ({ + children, +}) => { const { openDocs } = useEditorManagerContext() const { projectSnapshot } = useProjectContext() const { pathInFolder } = useFileTreePathContext() @@ -99,14 +99,13 @@ export const PythonExecutionProvider: FC< fileId, baseAssetPathRef.current, () => getExecutionContext(fileId), - createPyodideWorker, - packageBaseUrl + createPyodideWorker ) runner.init() runnersRef.current.set(fileId, runner) return runner }, - [getExecutionContext, packageBaseUrl] + [getExecutionContext] ) useEffect(() => { diff --git a/services/web/package.json b/services/web/package.json index 5e8d3da33c..1cb392da50 100644 --- a/services/web/package.json +++ b/services/web/package.json @@ -26,6 +26,7 @@ "nodemon": "node --watch app.mjs --watch-locales", "webpack": "webpack serve --config webpack.config.dev.js", "webpack:production": "webpack --config webpack.config.prod.js", + "pyodide:fetch": "node scripts/fetch-pyodide-packages.mjs", "webpack:profile": "webpack --config webpack.config.prod.js --profile --json > stats.json", "lint": "eslint --cache --cache-location ../../node_modules/.cache/eslint/ --max-warnings 0 --format unix --ext .js,.jsx,.mjs,.ts,.tsx .", "lint:fix": "eslint --cache --cache-location ../../node_modules/.cache/eslint/ --fix --ext .js,.jsx,.mjs,.ts,.tsx .", @@ -374,7 +375,7 @@ "postcss": "^8.4.31", "postcss-loader": "^7.3.3", "prop-types": "^15.7.2", - "pyodide": "^0.29.0", + "pyodide": "0.29.3", "qrcode": "^1.4.4", "react": "^18.3.1", "react-bootstrap": "^2.10.10", diff --git a/services/web/scripts/fetch-pyodide-packages.mjs b/services/web/scripts/fetch-pyodide-packages.mjs new file mode 100644 index 0000000000..dc10bc856f --- /dev/null +++ b/services/web/scripts/fetch-pyodide-packages.mjs @@ -0,0 +1,136 @@ +/* eslint-disable @overleaf/require-script-runner */ +// This script doesn't work with ScriptRunner because it is run during the build process. +import { createReadStream, createWriteStream } from 'node:fs' +import { mkdir, readdir, rm, stat, writeFile } from 'node:fs/promises' +import { Readable } from 'node:stream' +import { pipeline } from 'node:stream/promises' +import { execFile } from 'node:child_process' +import { createHash } from 'node:crypto' +import { promisify } from 'node:util' +import path from 'node:path' +import { fileURLToPath } from 'node:url' + +const execFileAsync = promisify(execFile) + +const SERVICE_WEB_DIR = path.resolve(fileURLToPath(import.meta.url), '../..') + +// Pinned pyodide release tarball. Keep PYODIDE_VERSION in sync with the +// "pyodide" entry in services/web/package.json. When bumping, update both +// PYODIDE_VERSION and EXPECTED_SHA256 together; fetch the hash via: +// curl -sL https://api.github.com/repos/pyodide/pyodide/releases/tags/ \ +// | jq -r '.assets[] | select(.name=="pyodide-.tar.bz2") | .digest' +// (strip the "sha256:" prefix). Cross-check by downloading the tarball and +// running `shasum -a 256 pyodide-.tar.bz2`. +const PYODIDE_VERSION = '0.29.3' +const EXPECTED_SHA256 = + '458e8ddbcbb6e21037d3237cd5c5146c451765bc738dfa2249ff34c5140331e4' +const TARGET_DIR = path.join( + SERVICE_WEB_DIR, + 'public/js/libs/pyodide', + PYODIDE_VERSION +) +const TARBALL_NAME = `pyodide-${PYODIDE_VERSION}.tar.bz2` +const RELEASE_URL = `https://github.com/pyodide/pyodide/releases/download/${PYODIDE_VERSION}/${TARBALL_NAME}` +const COMPLETE_MARKER = path.join(TARGET_DIR, '.fetch-complete') + +async function download(url, dest) { + console.log(`Downloading ${url}`) + const res = await fetch(url, { redirect: 'follow' }) + if (!res.ok) { + throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`) + } + await pipeline(Readable.fromWeb(res.body), createWriteStream(dest)) +} + +async function sha256(file) { + const hash = createHash('sha256') + await pipeline(createReadStream(file), hash) + return hash.digest('hex') +} + +// The version subdir only needs what pyodide fetches via packageBaseUrl +// (wheels, their .metadata sidecars, and lib*.zip shared libraries). Skip +// everything else: +// - core runtime (pyodide.mjs / asm / stdlib / lock) lives one level up, +// copied from the npm package by webpack CopyPlugin. +// - *-tests.tar / test-*.zip: per-package test fixtures and pyodide's own +// test packages, not used at runtime. +// - console*.html, python / python.exe / python.bat / python_cli_entry.mjs, +// README.md: REPL UI, CLI shims, and docs. +const TAR_EXCLUDES = [ + 'pyodide.mjs', + 'pyodide.asm.js', + 'pyodide.asm.wasm', + 'python_stdlib.zip', + 'pyodide-lock.json', + '*-tests.tar', + 'test-*.zip', + 'console*.html', + 'python', + 'python.exe', + 'python.bat', + 'python_cli_entry.mjs', + 'README.md', +] + +async function extract(tarball, targetDir) { + console.log(`Extracting ${path.basename(tarball)}`) + // Tarball contains a top-level pyodide/ folder; strip it so contents land + // directly in targetDir. + await execFileAsync('tar', [ + '-xjf', + tarball, + '-C', + targetDir, + '--strip-components=1', + ...TAR_EXCLUDES.map(p => `--exclude=${p}`), + ]) +} + +async function main() { + try { + await stat(COMPLETE_MARKER) + console.log(`Pyodide ${PYODIDE_VERSION} already present at ${TARGET_DIR}`) + return + } catch (err) { + if (err.code !== 'ENOENT') throw err + } + + // A prior run may have left a partial install without the marker; wipe it + // so extraction starts from a clean directory. + await rm(TARGET_DIR, { recursive: true, force: true }) + await mkdir(TARGET_DIR, { recursive: true }) + + const tarballPath = path.join(TARGET_DIR, TARBALL_NAME) + try { + await download(RELEASE_URL, tarballPath) + const actual = await sha256(tarballPath) + if (actual !== EXPECTED_SHA256) { + throw new Error( + `SHA-256 mismatch for ${TARBALL_NAME}: expected ${EXPECTED_SHA256}, got ${actual}` + ) + } + await extract(tarballPath, TARGET_DIR) + await rm(tarballPath, { force: true }) + + const extracted = await readdir(TARGET_DIR) + if (!extracted.some(name => name.endsWith('.whl'))) { + throw new Error( + `Extraction did not produce any wheels under ${TARGET_DIR}` + ) + } + + await writeFile(COMPLETE_MARKER, '') + } catch (err) { + // Leave no partial install behind, so the next run starts clean. + await rm(TARGET_DIR, { recursive: true, force: true }) + throw err + } + + console.log(`Pyodide ${PYODIDE_VERSION} ready at ${TARGET_DIR}`) +} + +main().catch(err => { + console.error(err) + process.exit(1) +}) diff --git a/services/web/test/frontend/features/ide-react/components/python-output-pane.spec.tsx b/services/web/test/frontend/features/ide-react/components/python-output-pane.spec.tsx index 655dad4ad0..1e5d8d12df 100644 --- a/services/web/test/frontend/features/ide-react/components/python-output-pane.spec.tsx +++ b/services/web/test/frontend/features/ide-react/components/python-output-pane.spec.tsx @@ -325,9 +325,48 @@ describe('', function () { }} providers={{ FileTreePathProvider, ProjectProvider }} > - + + + + + ) + + cy.findByRole('button', { name: 'Run Python code' }) + .should('not.be.disabled') + .click() + cy.findByText("ModuleNotFoundError: No module named 'tomli'").should( + 'not.exist' + ) + cy.findByText('hello from tomli').should('exist') + }) + + it('auto-installs python packages imported by the executing script', function () { + const executablePythonFileContents = [ + 'import tomli', + '', + "print(tomli.loads('greeting = \"hello from tomli\"')['greeting'])", + ].join('\n') + + const projectFiles = { + [pythonExecutableScript.filename]: executablePythonFileContents, + } + const ProjectProvider = makeProjectProvider(projectFiles) + + cy.mount( + executablePythonFileContents, + }, + currentDocumentId: pythonExecutableScript.file_id, + openDocName: pythonExecutableScript.filename, + }, + }} + providers={{ FileTreePathProvider, ProjectProvider }} + > + diff --git a/services/web/test/frontend/features/ide-react/unit/editor/pyodide-worker-client.spec.ts b/services/web/test/frontend/features/ide-react/unit/editor/pyodide-worker-client.spec.ts index 175b0b5027..814251e0dc 100644 --- a/services/web/test/frontend/features/ide-react/unit/editor/pyodide-worker-client.spec.ts +++ b/services/web/test/frontend/features/ide-react/unit/editor/pyodide-worker-client.spec.ts @@ -349,7 +349,6 @@ describe('PyodideWorkerClient', function () { { type: 'init', baseAssetPath: BASE_ASSET_PATH, - packageBaseUrl: undefined, }, ]) }) diff --git a/services/web/webpack.config.js b/services/web/webpack.config.js index 7bd7b2c247..75a75f663d 100644 --- a/services/web/webpack.config.js +++ b/services/web/webpack.config.js @@ -411,7 +411,9 @@ module.exports = { toType: 'dir', context: `${dictionariesDir}/dictionaries`, }, - // Copy Pyodide runtime assets from npm package for local serving. + // Copy Pyodide runtime assets from the npm package so the loader is + // always available. Python package wheels are fetched separately by + // scripts/fetch-pyodide-packages.mjs into the same directory on disk. { from: 'pyodide.mjs', to: 'js/libs/pyodide', diff --git a/yarn.lock b/yarn.lock index 99a2f7efaa..87e34425d0 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7437,7 +7437,7 @@ __metadata: prop-types: "npm:^15.7.2" pug: "npm:^3.0.3" pug-runtime: "npm:^3.0.1" - pyodide: "npm:^0.29.0" + pyodide: "npm:0.29.3" qrcode: "npm:^1.4.4" rate-limiter-flexible: "npm:^2.4.1" react: "npm:^18.3.1" @@ -28272,7 +28272,7 @@ __metadata: languageName: node linkType: hard -"pyodide@npm:^0.29.0": +"pyodide@npm:0.29.3": version: 0.29.3 resolution: "pyodide@npm:0.29.3" dependencies: