Linked URL: add sanitize and normalize

This commit is contained in:
yu-i-i
2026-01-28 03:08:45 +01:00
parent 1e67fa3e9f
commit 90f8a85459
6 changed files with 54 additions and 30 deletions

View File

@@ -230,6 +230,7 @@
"services/history-v1",
"services/idp",
"services/latexqc",
"services/linked-url-proxy",
"services/notifications",
"services/project-history",
"services/real-time",

View File

@@ -2,7 +2,7 @@
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
FROM node:20.18.2 AS base
FROM node:22.18.0 AS base
WORKDIR /overleaf/services/linked-url-proxy
@@ -13,16 +13,20 @@ RUN mkdir /home/node/.config && chown node:node /home/node/.config
FROM base AS app
COPY package.json package-lock.json /overleaf/
COPY libraries/fetch-utils/package.json /overleaf/libraries/fetch-utils/package.json
COPY libraries/logger/package.json /overleaf/libraries/logger/package.json
COPY libraries/metrics/package.json /overleaf/libraries/metrics/package.json
COPY libraries/o-error/package.json /overleaf/libraries/o-error/package.json
COPY libraries/settings/package.json /overleaf/libraries/settings/package.json
COPY services/linked-url-proxy/package.json /overleaf/services/linked-url-proxy/package.json
COPY patches/ /overleaf/patches/
RUN cd /overleaf && npm ci --quiet
COPY libraries/fetch-utils/ /overleaf/libraries/fetch-utils/
COPY libraries/logger/ /overleaf/libraries/logger/
COPY libraries/metrics/ /overleaf/libraries/metrics/
COPY libraries/o-error/ /overleaf/libraries/o-error/
COPY libraries/settings/ /overleaf/libraries/settings/
COPY services/linked-url-proxy/ /overleaf/services/linked-url-proxy/

View File

@@ -1,4 +1,6 @@
import dns from 'dns/promises'
import { sanitizeUrl } from 'strict-url-sanitise'
import normalizeUrlPath from 'als-normalize-urlpath'
import ipaddr from 'ipaddr.js'
import { URL } from 'node:url'
import { Transform } from 'node:stream'
@@ -18,15 +20,7 @@ function isBlockedIp(ipStr, targetUrl) {
}
const range = addr.range()
if ([
'loopback',
'private',
'linkLocal',
'multicast',
'reserved',
'broadcast',
'unspecified'
].includes(range)) {
if (!['unicast'].includes(addr.range())) {
return true
}
@@ -44,7 +38,7 @@ function isBlockedIp(ipStr, targetUrl) {
return false
}
async function validateSourceUrl(hostname, targetUrl) {
async function checkUrlAccess(hostname, targetUrl) {
const records = await dns.lookup(hostname, { all: true }).catch(() => [])
if (!records.length) {
const err = new Error(`DNS lookup failed for ${hostname}`)
@@ -62,22 +56,40 @@ async function validateSourceUrl(hostname, targetUrl) {
}
}
async function fetchValidated(urlStr, redirectCount = 0) {
async function validateAndFetch(rawUrl, redirectCount = 0) {
if (redirectCount > Settings.maxRedirects) {
const err = new Error('Too many redirects')
err.info = { status: 421 }
throw err
}
const url = new URL(urlStr)
const sanitizedUrl = sanitizeUrl(rawUrl)
if (!sanitizedUrl) {
const err = new Error(`Invalid or unsafe URL: ${rawUrl}`)
err.info = { status: 400 }
throw err
}
const url = new URL(sanitizedUrl)
if (!['http:', 'https:'].includes(url.protocol)) {
const err = new Error(`${url.protocol} protocol is not allowed`)
err.info = { status: 400 }
throw err
}
// Validate DNS and blocked IPs
await validateSourceUrl(url.hostname, urlStr)
const normalizedPath = normalizeUrlPath(url.pathname).pathname
if (!normalizedPath) {
const err = new Error(`Invalid or unsafe URL path: ${url.pathname}`)
err.info = { status: 400 }
throw err
}
const normalizedUrl = url.toString()
// check DNS and allowed resources
await checkUrlAccess(url.hostname, normalizedUrl)
const opts = {
redirect: 'manual',
@@ -86,7 +98,7 @@ async function fetchValidated(urlStr, redirectCount = 0) {
}
try {
const { stream, response } = await fetchStreamWithResponse(urlStr, opts)
const { stream, response } = await fetchStreamWithResponse(normalizedUrl, opts)
const contentLengthHeader = response.headers.get('content-length')
if (contentLengthHeader) {
@@ -112,8 +124,8 @@ async function fetchValidated(urlStr, redirectCount = 0) {
if (status >= 300 && status < 400) {
const location = err.response.headers.get('Location')
if (location) {
const nextUrl = new URL(location, url).toString()
return fetchValidated(nextUrl, redirectCount + 1)
const nextUrl = new URL(location, normalizedUrl).toString()
return validateAndFetch(nextUrl, redirectCount + 1)
} else {
const e = new Error('Redirect response missing Location header')
e.info = { status: 421 }
@@ -141,14 +153,14 @@ async function proxy(req, res) {
return
}
const { stream: upstreamStream, response, headers } = await fetchValidated(targetUrl)
const { stream: upstreamStream, response, headers } = await validateAndFetch(targetUrl)
res.statusCode = response.status || 200
res.setHeader('Content-Type', headers['content-type'] || 'application/octet-stream')
res.setHeader('Cache-Control', 'no-store')
function onError(err) {
logger.warn({ err, url: req.url }, 'linked-url-proxy request failed')
logger.info({ err, url: req.url }, 'linked-url-proxy request failed')
try { upstreamStream.destroy() } catch (_) {}
if (!res.headersSent) {
let body = `Error: ${err?.message ?? String(err)}`
@@ -163,11 +175,9 @@ async function proxy(req, res) {
upstreamStream.pipe(res)
} catch (err) {
logger.warn({ err, url: req.url }, 'linked-url-proxy request failed')
let status = err.info.status
const status = err.info?.status || 500
logger.info({ linkedUrl: err.message, status, url: req.url }, 'linked-url-proxy request failed')
let body = `Error: ${err.message || String(err)}`
try {
res.writeHead(status, { 'Content-Type': 'text/plain' })
res.end(body)

View File

@@ -2,18 +2,24 @@
"name": "@overleaf/linked-url-proxy",
"description": "An API for providing linked url proxy",
"private": true,
"type": "module",
"main": "app.mjs",
"scripts": {
"start": "node app.mjs"
"start": "node app.mjs",
"nodemon": "node --watch app.mjs"
},
"version": "0.1.0",
"version": "0.1.1",
"dependencies": {
"@overleaf/settings": "*",
"@overleaf/logger": "*",
"@overleaf/metrics": "*",
"async": "^3.2.5",
"express": "^4.21.2"
"ipaddr.js": "^1.9.1"
"express": "^4.22.1",
"ipaddr.js": "^2.1.0",
"als-normalize-urlpath": "^2.3.0",
"strict-url-sanitise": "^0.0.1"
},
"devDependencies": {
"als-normalize-urlpath": "^2.3.0",
"strict-url-sanitise": "^0.0.1"
}
}

View File

@@ -1,7 +1,7 @@
{
"extends": "../../tsconfig.backend.json",
"include": [
"app.js",
"app.mjs",
"app.ts",
"app/js/**/*",
"benchmarks/**/*",

View File

@@ -74,6 +74,7 @@
"last 1 year",
"safari > 14"
],
"dependencies": {
"@ai-sdk/google-vertex": "^4.0.113",
"@ai-sdk/mcp": "patch:@ai-sdk/mcp@npm%3A1.0.37#~/.yarn/patches/@ai-sdk-mcp-npm-1.0.37-8cd89b8972.patch",
@@ -109,6 +110,7 @@
"accepts": "^1.3.7",
"ai": "^6.0.169",
"ajv": "^8.12.0",
"als-normalize-urlpath": "^2.3.0",
"archiver": "^5.3.0",
"async": "^3.2.5",
"base-x": "^4.0.1",
@@ -182,6 +184,7 @@
"request": "2.88.2",
"requestretry": "7.1.0",
"sanitize-html": "^2.8.1",
"strict-url-sanitise": "^0.0.1",
"stripe": "^18.4.0",
"tough-cookie": "^4.0.0",
"tsscmp": "^1.0.6",