Use overleaf CDN for loading pyodide packages

GitOrigin-RevId: e17ff3387166421a546a9519786d77ba12cdffc4
This commit is contained in:
Domagoj Kriskovic
2026-04-29 12:32:50 +02:00
committed by Copybot
parent a46ca0705f
commit 9e677a2c1e
15 changed files with 203 additions and 39 deletions

View File

@@ -27,6 +27,11 @@ RUN update-ca-certificates
# the deps image is used for caching yarn workspaces focus # the deps image is used for caching yarn workspaces focus
FROM base AS deps-prod FROM base AS deps-prod
# Pyodide wheel bundle (~370 MB). Version + SHA-256 are pinned in the fetch
# script; keep that in sync with the pyodide dep in services/web/package.json.
COPY services/web/scripts/fetch-pyodide-packages.mjs /overleaf/services/web/scripts/fetch-pyodide-packages.mjs
RUN cd /overleaf/services/web && node scripts/fetch-pyodide-packages.mjs
COPY package.json yarn.lock .yarnrc.yml /overleaf/ COPY package.json yarn.lock .yarnrc.yml /overleaf/
COPY libraries/access-token-encryptor/package.json /overleaf/libraries/access-token-encryptor/package.json COPY libraries/access-token-encryptor/package.json /overleaf/libraries/access-token-encryptor/package.json
COPY libraries/eslint-plugin/package.json /overleaf/libraries/eslint-plugin/package.json COPY libraries/eslint-plugin/package.json /overleaf/libraries/eslint-plugin/package.json

View File

@@ -649,6 +649,7 @@ IMAGE_CACHE ?= $(IMAGE_REPO):cache-$(shell cat \
$(MONOREPO)/libraries/stream-utils/package.json \ $(MONOREPO)/libraries/stream-utils/package.json \
$(MONOREPO)/libraries/validation-tools/package.json \ $(MONOREPO)/libraries/validation-tools/package.json \
$(MONOREPO)/services/web/package.json \ $(MONOREPO)/services/web/package.json \
$(MONOREPO)/services/web/scripts/fetch-pyodide-packages.mjs \
$(MONOREPO)/patches/* \ $(MONOREPO)/patches/* \
| sha256sum | cut -d '-' -f1) | sha256sum | cut -d '-' -f1)

View File

@@ -16,8 +16,8 @@ const buildConfig = () => {
watch: false, watch: false,
}, },
{ {
directory: path.join(__dirname, '../fixtures/pyodide-packages'), directory: path.join(__dirname, '../../public/js/libs/pyodide'),
publicPath: '/pyodide-packages/', publicPath: '/__cypress/src/js/libs/pyodide/',
watch: false, watch: false,
}, },
], ],

View File

@@ -30,7 +30,6 @@ export type LifecycleCallback = (
export class PyodideWorkerClient { export class PyodideWorkerClient {
private worker: Worker private worker: Worker
private baseAssetPath: string private baseAssetPath: string
private packageBaseUrl: string | undefined
private createWorker: () => Worker private createWorker: () => Worker
private listening = false private listening = false
private destroyed = false private destroyed = false
@@ -41,13 +40,11 @@ export class PyodideWorkerClient {
constructor(options: { constructor(options: {
baseAssetPath: string baseAssetPath: string
packageBaseUrl?: string
createWorker: () => Worker createWorker: () => Worker
onOutput?: OutputCallback onOutput?: OutputCallback
onLifecycle?: LifecycleCallback onLifecycle?: LifecycleCallback
}) { }) {
this.baseAssetPath = options.baseAssetPath this.baseAssetPath = options.baseAssetPath
this.packageBaseUrl = options.packageBaseUrl
this.createWorker = options.createWorker this.createWorker = options.createWorker
this.outputCallback = options.onOutput ?? null this.outputCallback = options.onOutput ?? null
this.lifecycleCallback = options.onLifecycle ?? null this.lifecycleCallback = options.onLifecycle ?? null
@@ -57,7 +54,6 @@ export class PyodideWorkerClient {
this.queueMessage({ this.queueMessage({
type: 'init', type: 'init',
baseAssetPath: this.baseAssetPath, baseAssetPath: this.baseAssetPath,
packageBaseUrl: this.packageBaseUrl,
}) })
} }
@@ -101,7 +97,6 @@ export class PyodideWorkerClient {
this.queueMessage({ this.queueMessage({
type: 'init', type: 'init',
baseAssetPath: this.baseAssetPath, baseAssetPath: this.baseAssetPath,
packageBaseUrl: this.packageBaseUrl,
}) })
} }

View File

@@ -15,7 +15,6 @@ export type OutputFileData = {
export type InitRequest = { export type InitRequest = {
type: 'init' type: 'init'
baseAssetPath: string baseAssetPath: string
packageBaseUrl?: string
} }
export type RunCodeRequest = { export type RunCodeRequest = {

View File

@@ -15,7 +15,6 @@ type PyodideModule = typeof import('pyodide')
const PROJECT_FS_ROOT = '/project' const PROJECT_FS_ROOT = '/project'
const PROJECT_FS_PREFIX = `${PROJECT_FS_ROOT}/` const PROJECT_FS_PREFIX = `${PROJECT_FS_ROOT}/`
const PYODIDE_INDEX_PATH = 'js/libs/pyodide/' const PYODIDE_INDEX_PATH = 'js/libs/pyodide/'
const PYODIDE_CDN_URL = 'https://cdn.jsdelivr.net/pyodide/v'
function ensureDirectoryExists(fs: PyodideFS, filePath: string) { function ensureDirectoryExists(fs: PyodideFS, filePath: string) {
const directory = path.dirname(filePath) const directory = path.dirname(filePath)
@@ -51,7 +50,7 @@ function syncProjectFiles(fs: PyodideFS, files: ProjectFileData[]) {
} }
let pyodideModule: PyodideModule | null = null let pyodideModule: PyodideModule | null = null
let packageBaseUrlOverride: string | undefined let pyodideIndexUrl: string | undefined
async function loadPyodideModule(pyodideIndexUrl: string) { async function loadPyodideModule(pyodideIndexUrl: string) {
const runtimeModuleUrl = `${pyodideIndexUrl}pyodide.mjs` const runtimeModuleUrl = `${pyodideIndexUrl}pyodide.mjs`
@@ -70,12 +69,7 @@ async function loadPyodideModule(pyodideIndexUrl: string) {
} }
async function handleInit(msg: InitRequest) { async function handleInit(msg: InitRequest) {
const pyodideIndexUrl = new URL( pyodideIndexUrl = new URL(PYODIDE_INDEX_PATH, msg.baseAssetPath).toString()
PYODIDE_INDEX_PATH,
msg.baseAssetPath
).toString()
packageBaseUrlOverride = msg.packageBaseUrl
try { try {
pyodideModule = await loadPyodideModule(pyodideIndexUrl) pyodideModule = await loadPyodideModule(pyodideIndexUrl)
@@ -93,7 +87,7 @@ async function handleInit(msg: InitRequest) {
async function handleRunCode(msg: RunCodeRequest) { async function handleRunCode(msg: RunCodeRequest) {
const { fileId, executionId } = msg const { fileId, executionId } = msg
if (!pyodideModule) { if (!pyodideModule || !pyodideIndexUrl) {
self.postMessage({ self.postMessage({
type: 'output-line', type: 'output-line',
stream: 'stderr', stream: 'stderr',
@@ -114,9 +108,7 @@ async function handleRunCode(msg: RunCodeRequest) {
const instance = await pyodideModule.loadPyodide({ const instance = await pyodideModule.loadPyodide({
env: { MPLBACKEND: 'Agg' }, env: { MPLBACKEND: 'Agg' },
packageBaseUrl: packageBaseUrl: `${pyodideIndexUrl}${pyodideModule.version}/`,
packageBaseUrlOverride ??
`${PYODIDE_CDN_URL}${pyodideModule.version}/full/`,
}) })
const writtenPaths = new Set<string>() const writtenPaths = new Set<string>()

View File

@@ -43,7 +43,6 @@ export class PythonRunner {
readonly fileId: string readonly fileId: string
private client: PyodideWorkerClient | null = null private client: PyodideWorkerClient | null = null
private readonly baseAssetPath: string private readonly baseAssetPath: string
private readonly packageBaseUrl: string | undefined
private readonly createWorker: () => Worker private readonly createWorker: () => Worker
private readonly getExecutionContext: () => Promise<ExecutionContext | null> private readonly getExecutionContext: () => Promise<ExecutionContext | null>
private listeners = new Set<Listener>() private listeners = new Set<Listener>()
@@ -55,12 +54,10 @@ export class PythonRunner {
fileId: string, fileId: string,
baseAssetPath: string, baseAssetPath: string,
getExecutionContext: () => Promise<ExecutionContext | null>, getExecutionContext: () => Promise<ExecutionContext | null>,
createWorker: () => Worker, createWorker: () => Worker
packageBaseUrl?: string
) { ) {
this.fileId = fileId this.fileId = fileId
this.baseAssetPath = baseAssetPath this.baseAssetPath = baseAssetPath
this.packageBaseUrl = packageBaseUrl
this.createWorker = createWorker this.createWorker = createWorker
this.getExecutionContext = getExecutionContext this.getExecutionContext = getExecutionContext
} }
@@ -102,7 +99,6 @@ export class PythonRunner {
this.client = new PyodideWorkerClient({ this.client = new PyodideWorkerClient({
baseAssetPath: this.baseAssetPath, baseAssetPath: this.baseAssetPath,
packageBaseUrl: this.packageBaseUrl,
createWorker: this.createWorker, createWorker: this.createWorker,
onLifecycle: event => { onLifecycle: event => {
switch (event.type) { switch (event.type) {

View File

@@ -37,9 +37,9 @@ export const PythonExecutionContext = createContext<
PythonExecutionContextValue | undefined PythonExecutionContextValue | undefined
>(undefined) >(undefined)
export const PythonExecutionProvider: FC< export const PythonExecutionProvider: FC<PropsWithChildren> = ({
PropsWithChildren<{ packageBaseUrl?: string }> children,
> = ({ children, packageBaseUrl }) => { }) => {
const { openDocs } = useEditorManagerContext() const { openDocs } = useEditorManagerContext()
const { projectSnapshot } = useProjectContext() const { projectSnapshot } = useProjectContext()
const { pathInFolder } = useFileTreePathContext() const { pathInFolder } = useFileTreePathContext()
@@ -99,14 +99,13 @@ export const PythonExecutionProvider: FC<
fileId, fileId,
baseAssetPathRef.current, baseAssetPathRef.current,
() => getExecutionContext(fileId), () => getExecutionContext(fileId),
createPyodideWorker, createPyodideWorker
packageBaseUrl
) )
runner.init() runner.init()
runnersRef.current.set(fileId, runner) runnersRef.current.set(fileId, runner)
return runner return runner
}, },
[getExecutionContext, packageBaseUrl] [getExecutionContext]
) )
useEffect(() => { useEffect(() => {

View File

@@ -26,6 +26,7 @@
"nodemon": "node --watch app.mjs --watch-locales", "nodemon": "node --watch app.mjs --watch-locales",
"webpack": "webpack serve --config webpack.config.dev.js", "webpack": "webpack serve --config webpack.config.dev.js",
"webpack:production": "webpack --config webpack.config.prod.js", "webpack:production": "webpack --config webpack.config.prod.js",
"pyodide:fetch": "node scripts/fetch-pyodide-packages.mjs",
"webpack:profile": "webpack --config webpack.config.prod.js --profile --json > stats.json", "webpack:profile": "webpack --config webpack.config.prod.js --profile --json > stats.json",
"lint": "eslint --cache --cache-location ../../node_modules/.cache/eslint/ --max-warnings 0 --format unix --ext .js,.jsx,.mjs,.ts,.tsx .", "lint": "eslint --cache --cache-location ../../node_modules/.cache/eslint/ --max-warnings 0 --format unix --ext .js,.jsx,.mjs,.ts,.tsx .",
"lint:fix": "eslint --cache --cache-location ../../node_modules/.cache/eslint/ --fix --ext .js,.jsx,.mjs,.ts,.tsx .", "lint:fix": "eslint --cache --cache-location ../../node_modules/.cache/eslint/ --fix --ext .js,.jsx,.mjs,.ts,.tsx .",
@@ -374,7 +375,7 @@
"postcss": "^8.4.31", "postcss": "^8.4.31",
"postcss-loader": "^7.3.3", "postcss-loader": "^7.3.3",
"prop-types": "^15.7.2", "prop-types": "^15.7.2",
"pyodide": "^0.29.0", "pyodide": "0.29.3",
"qrcode": "^1.4.4", "qrcode": "^1.4.4",
"react": "^18.3.1", "react": "^18.3.1",
"react-bootstrap": "^2.10.10", "react-bootstrap": "^2.10.10",

View File

@@ -0,0 +1,136 @@
/* eslint-disable @overleaf/require-script-runner */
// This script doesn't work with ScriptRunner because it is run during the build process.
import { createReadStream, createWriteStream } from 'node:fs'
import { mkdir, readdir, rm, stat, writeFile } from 'node:fs/promises'
import { Readable } from 'node:stream'
import { pipeline } from 'node:stream/promises'
import { execFile } from 'node:child_process'
import { createHash } from 'node:crypto'
import { promisify } from 'node:util'
import path from 'node:path'
import { fileURLToPath } from 'node:url'
const execFileAsync = promisify(execFile)
const SERVICE_WEB_DIR = path.resolve(fileURLToPath(import.meta.url), '../..')
// Pinned pyodide release tarball. Keep PYODIDE_VERSION in sync with the
// "pyodide" entry in services/web/package.json. When bumping, update both
// PYODIDE_VERSION and EXPECTED_SHA256 together; fetch the hash via:
// curl -sL https://api.github.com/repos/pyodide/pyodide/releases/tags/<ver> \
// | jq -r '.assets[] | select(.name=="pyodide-<ver>.tar.bz2") | .digest'
// (strip the "sha256:" prefix). Cross-check by downloading the tarball and
// running `shasum -a 256 pyodide-<ver>.tar.bz2`.
const PYODIDE_VERSION = '0.29.3'
const EXPECTED_SHA256 =
'458e8ddbcbb6e21037d3237cd5c5146c451765bc738dfa2249ff34c5140331e4'
const TARGET_DIR = path.join(
SERVICE_WEB_DIR,
'public/js/libs/pyodide',
PYODIDE_VERSION
)
const TARBALL_NAME = `pyodide-${PYODIDE_VERSION}.tar.bz2`
const RELEASE_URL = `https://github.com/pyodide/pyodide/releases/download/${PYODIDE_VERSION}/${TARBALL_NAME}`
const COMPLETE_MARKER = path.join(TARGET_DIR, '.fetch-complete')
async function download(url, dest) {
console.log(`Downloading ${url}`)
const res = await fetch(url, { redirect: 'follow' })
if (!res.ok) {
throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`)
}
await pipeline(Readable.fromWeb(res.body), createWriteStream(dest))
}
async function sha256(file) {
const hash = createHash('sha256')
await pipeline(createReadStream(file), hash)
return hash.digest('hex')
}
// The version subdir only needs what pyodide fetches via packageBaseUrl
// (wheels, their .metadata sidecars, and lib*.zip shared libraries). Skip
// everything else:
// - core runtime (pyodide.mjs / asm / stdlib / lock) lives one level up,
// copied from the npm package by webpack CopyPlugin.
// - *-tests.tar / test-*.zip: per-package test fixtures and pyodide's own
// test packages, not used at runtime.
// - console*.html, python / python.exe / python.bat / python_cli_entry.mjs,
// README.md: REPL UI, CLI shims, and docs.
const TAR_EXCLUDES = [
'pyodide.mjs',
'pyodide.asm.js',
'pyodide.asm.wasm',
'python_stdlib.zip',
'pyodide-lock.json',
'*-tests.tar',
'test-*.zip',
'console*.html',
'python',
'python.exe',
'python.bat',
'python_cli_entry.mjs',
'README.md',
]
async function extract(tarball, targetDir) {
console.log(`Extracting ${path.basename(tarball)}`)
// Tarball contains a top-level pyodide/ folder; strip it so contents land
// directly in targetDir.
await execFileAsync('tar', [
'-xjf',
tarball,
'-C',
targetDir,
'--strip-components=1',
...TAR_EXCLUDES.map(p => `--exclude=${p}`),
])
}
async function main() {
try {
await stat(COMPLETE_MARKER)
console.log(`Pyodide ${PYODIDE_VERSION} already present at ${TARGET_DIR}`)
return
} catch (err) {
if (err.code !== 'ENOENT') throw err
}
// A prior run may have left a partial install without the marker; wipe it
// so extraction starts from a clean directory.
await rm(TARGET_DIR, { recursive: true, force: true })
await mkdir(TARGET_DIR, { recursive: true })
const tarballPath = path.join(TARGET_DIR, TARBALL_NAME)
try {
await download(RELEASE_URL, tarballPath)
const actual = await sha256(tarballPath)
if (actual !== EXPECTED_SHA256) {
throw new Error(
`SHA-256 mismatch for ${TARBALL_NAME}: expected ${EXPECTED_SHA256}, got ${actual}`
)
}
await extract(tarballPath, TARGET_DIR)
await rm(tarballPath, { force: true })
const extracted = await readdir(TARGET_DIR)
if (!extracted.some(name => name.endsWith('.whl'))) {
throw new Error(
`Extraction did not produce any wheels under ${TARGET_DIR}`
)
}
await writeFile(COMPLETE_MARKER, '')
} catch (err) {
// Leave no partial install behind, so the next run starts clean.
await rm(TARGET_DIR, { recursive: true, force: true })
throw err
}
console.log(`Pyodide ${PYODIDE_VERSION} ready at ${TARGET_DIR}`)
}
main().catch(err => {
console.error(err)
process.exit(1)
})

View File

@@ -325,9 +325,48 @@ describe('<PythonOutputPane />', function () {
}} }}
providers={{ FileTreePathProvider, ProjectProvider }} providers={{ FileTreePathProvider, ProjectProvider }}
> >
<PythonExecutionProvider <PythonExecutionProvider>
packageBaseUrl={`${window.location.origin}/pyodide-packages/`} <PythonOutputPane />
> </PythonExecutionProvider>
</EditorProviders>
)
cy.findByRole('button', { name: 'Run Python code' })
.should('not.be.disabled')
.click()
cy.findByText("ModuleNotFoundError: No module named 'tomli'").should(
'not.exist'
)
cy.findByText('hello from tomli').should('exist')
})
it('auto-installs python packages imported by the executing script', function () {
const executablePythonFileContents = [
'import tomli',
'',
"print(tomli.loads('greeting = \"hello from tomli\"')['greeting'])",
].join('\n')
const projectFiles = {
[pythonExecutableScript.filename]: executablePythonFileContents,
}
const ProjectProvider = makeProjectProvider(projectFiles)
cy.mount(
<EditorProviders
scope={{
editor: {
sharejs_doc: {
doc_id: pythonExecutableScript.file_id,
getSnapshot: () => executablePythonFileContents,
},
currentDocumentId: pythonExecutableScript.file_id,
openDocName: pythonExecutableScript.filename,
},
}}
providers={{ FileTreePathProvider, ProjectProvider }}
>
<PythonExecutionProvider>
<PythonOutputPane /> <PythonOutputPane />
</PythonExecutionProvider> </PythonExecutionProvider>
</EditorProviders> </EditorProviders>

View File

@@ -349,7 +349,6 @@ describe('PyodideWorkerClient', function () {
{ {
type: 'init', type: 'init',
baseAssetPath: BASE_ASSET_PATH, baseAssetPath: BASE_ASSET_PATH,
packageBaseUrl: undefined,
}, },
]) ])
}) })

View File

@@ -411,7 +411,9 @@ module.exports = {
toType: 'dir', toType: 'dir',
context: `${dictionariesDir}/dictionaries`, context: `${dictionariesDir}/dictionaries`,
}, },
// Copy Pyodide runtime assets from npm package for local serving. // Copy Pyodide runtime assets from the npm package so the loader is
// always available. Python package wheels are fetched separately by
// scripts/fetch-pyodide-packages.mjs into the same directory on disk.
{ {
from: 'pyodide.mjs', from: 'pyodide.mjs',
to: 'js/libs/pyodide', to: 'js/libs/pyodide',

View File

@@ -7437,7 +7437,7 @@ __metadata:
prop-types: "npm:^15.7.2" prop-types: "npm:^15.7.2"
pug: "npm:^3.0.3" pug: "npm:^3.0.3"
pug-runtime: "npm:^3.0.1" pug-runtime: "npm:^3.0.1"
pyodide: "npm:^0.29.0" pyodide: "npm:0.29.3"
qrcode: "npm:^1.4.4" qrcode: "npm:^1.4.4"
rate-limiter-flexible: "npm:^2.4.1" rate-limiter-flexible: "npm:^2.4.1"
react: "npm:^18.3.1" react: "npm:^18.3.1"
@@ -28272,7 +28272,7 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"pyodide@npm:^0.29.0": "pyodide@npm:0.29.3":
version: 0.29.3 version: 0.29.3
resolution: "pyodide@npm:0.29.3" resolution: "pyodide@npm:0.29.3"
dependencies: dependencies: