mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-23 17:19:37 +02:00
Merge pull request #27246 from overleaf/jpa-hotfix-5-5-3
[server-pro] add hotfix 5.5.3 GitOrigin-RevId: 6bd266afb8f5ba622224b6095204ee6801c05a44
This commit is contained in:
7
server-ce/bin/import_pr_patch.sh
Executable file
7
server-ce/bin/import_pr_patch.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
for PR in "$@"; do
|
||||
gh pr diff "$PR" --patch \
|
||||
| node -e 'const blob = require("fs").readFileSync("/dev/stdin", "utf-8"); console.log(blob.replace(/From [\s\S]+?\d+ files? changed,.+/g, ""))' \
|
||||
> "pr_$PR.patch"
|
||||
done
|
||||
25
server-ce/hotfix/5.5.3/Dockerfile
Normal file
25
server-ce/hotfix/5.5.3/Dockerfile
Normal file
@@ -0,0 +1,25 @@
|
||||
FROM sharelatex/sharelatex:5.5.2
|
||||
|
||||
# ../../bin/import_pr_patch.sh 27147 27173 27230 27240 27249 27257 27273 27397
|
||||
# Remove CE tests
|
||||
# Remove tests
|
||||
# Remove cloudbuild changes
|
||||
# Remove SaaS changes
|
||||
# Fixup package.json and toolbar-items.tsx
|
||||
# Fix cron paths
|
||||
COPY *.patch .
|
||||
RUN --mount=type=cache,target=/root/.cache \
|
||||
--mount=type=cache,target=/root/.npm \
|
||||
--mount=type=cache,target=/overleaf/services/web/node_modules/.cache,id=server-ce-webpack-cache \
|
||||
--mount=type=tmpfs,target=/tmp true \
|
||||
&& bash -ec 'for p in *.patch; do echo "=== Applying $p ==="; patch -p1 < "$p" && rm $p; done' \
|
||||
&& npm audit --audit-level=high \
|
||||
&& node genScript compile | bash \
|
||||
&& npm prune --omit=dev \
|
||||
&& apt remove -y linux-libc-dev
|
||||
|
||||
# ../../bin/import_pr_patch.sh 27476
|
||||
# Remove tests
|
||||
# Remove SaaS changes
|
||||
COPY pr_27476.patch-stage-2 .
|
||||
RUN patch -p1 < pr_27476.patch-stage-2 && rm pr_27476.patch-stage-2
|
||||
54
server-ce/hotfix/5.5.3/NOTES.md
Normal file
54
server-ce/hotfix/5.5.3/NOTES.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# Get the base container running
|
||||
docker build -t base .
|
||||
|
||||
CONTAINER_NAME=new
|
||||
|
||||
# Start the container
|
||||
docker run -t -i --entrypoint /bin/bash --name $CONTAINER_NAME base
|
||||
|
||||
# Clean any existing directories
|
||||
rm -rf /tmp/{a,b}
|
||||
|
||||
# Take snapshot of initial container
|
||||
mkdir /tmp/a ; docker export $CONTAINER_NAME | tar --exclude node_modules -x -C /tmp/a --strip-components=1 overleaf
|
||||
|
||||
# In the container, run the following commands
|
||||
docker exec -i $CONTAINER_NAME /bin/bash <<'EOF'
|
||||
npm install -g json
|
||||
json -I -f package.json -c 'this.overrides["swagger-tools"].multer="2.0.2"'
|
||||
json -I -f package.json -c 'this.overrides["request@2.88.2"]["form-data"]="2.5.5"'
|
||||
json -I -f package.json -c 'this.overrides["superagent@7.1.6"] ??= {}'
|
||||
json -I -f package.json -c 'this.overrides["superagent@7.1.6"]["form-data"]="4.0.4"'
|
||||
json -I -f package.json -c 'this.overrides["superagent@3.8.3"] ??= {}'
|
||||
json -I -f package.json -c 'this.overrides["superagent@3.8.3"]["form-data"]="2.5.5"'
|
||||
|
||||
npm uninstall -w libraries/metrics @google-cloud/opentelemetry-cloud-trace-exporter @google-cloud/profiler
|
||||
npm uninstall -w libraries/logger @google-cloud/logging-bunyan
|
||||
npm uninstall -w services/web @slack/webhook contentful @contentful/rich-text-types @contentful/rich-text-html-renderer
|
||||
npm uninstall -w services/history-v1 @google-cloud/secret-manager
|
||||
|
||||
npm uninstall -w services/web "@node-saml/passport-saml"
|
||||
npm install -w services/web "@node-saml/passport-saml@^5.1.0"
|
||||
|
||||
npm uninstall -w services/web multer
|
||||
npm install -w services/web "multer@2.0.2"
|
||||
|
||||
npm uninstall -w services/history-v1 swagger-tools
|
||||
npm install -w services/history-v1 swagger-tools@0.10.4
|
||||
|
||||
npm uninstall -w services/clsi request
|
||||
npm install -w services/clsi request@2.88.2
|
||||
npm install
|
||||
|
||||
npm audit --audit-level=high
|
||||
EOF
|
||||
|
||||
# Take snapshot of final container
|
||||
mkdir /tmp/b ; docker export $CONTAINER_NAME | tar --exclude node_modules -x -C /tmp/b --strip-components=1 overleaf
|
||||
|
||||
# Find the diff excluding node modules directories
|
||||
# The sec_ prefix ensures it applies after pr_* patches.
|
||||
(cd /tmp ; diff -u -x 'node_modules' -r a/ b/) > sec-npm.patch
|
||||
|
||||
# In the docker file we also need to remove linux-libc-dev
|
||||
apt remove -y linux-libc-dev
|
||||
27
server-ce/hotfix/5.5.3/multer.patch
Normal file
27
server-ce/hotfix/5.5.3/multer.patch
Normal file
@@ -0,0 +1,27 @@
|
||||
commit 43d0476e489cdf8e2e7261eb419810140d252a6d
|
||||
Author: Andrew Rumble <andrew.rumble@overleaf.com>
|
||||
Date: Fri Jul 25 12:18:26 2025 +0100
|
||||
|
||||
Add patch for multer 2.0.2
|
||||
|
||||
Co-authored-by: Ersun Warncke <ersun.warncke@overleaf.com>
|
||||
|
||||
diff --git a/patches/multer+2.0.2.patch b/patches/multer+2.0.2.patch
|
||||
new file mode 100644
|
||||
index 00000000000..f9959effe15
|
||||
--- /dev/null
|
||||
+++ b/patches/multer+2.0.2.patch
|
||||
@@ -0,0 +1,13 @@
|
||||
+diff --git a/node_modules/multer/lib/make-middleware.js b/node_modules/multer/lib/make-middleware.js
|
||||
+index 260dcb4..895b4b2 100644
|
||||
+--- a/node_modules/multer/lib/make-middleware.js
|
||||
++++ b/node_modules/multer/lib/make-middleware.js
|
||||
+@@ -113,7 +113,7 @@ function makeMiddleware (setup) {
|
||||
+ if (fieldname == null) return abortWithCode('MISSING_FIELD_NAME')
|
||||
+
|
||||
+ // don't attach to the files object, if there is no file
|
||||
+- if (!filename) return fileStream.resume()
|
||||
++ if (!filename) filename = 'undefined'
|
||||
+
|
||||
+ // Work around bug in Busboy (https://github.com/mscdex/busboy/issues/6)
|
||||
+ if (limits && Object.prototype.hasOwnProperty.call(limits, 'fieldNameSize')) {
|
||||
351
server-ce/hotfix/5.5.3/pr_27147.patch
Normal file
351
server-ce/hotfix/5.5.3/pr_27147.patch
Normal file
@@ -0,0 +1,351 @@
|
||||
|
||||
|
||||
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
index ba3e0d43598e..feb4612ddc23 100644
|
||||
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
@@ -33,7 +33,6 @@ import {
|
||||
makeProjectKey,
|
||||
} from '../lib/blob_store/index.js'
|
||||
import { backedUpBlobs as backedUpBlobsCollection, db } from '../lib/mongodb.js'
|
||||
-import filestorePersistor from '../lib/persistor.js'
|
||||
import commandLineArgs from 'command-line-args'
|
||||
import readline from 'node:readline'
|
||||
|
||||
@@ -179,6 +178,37 @@ const STREAM_HIGH_WATER_MARK = parseInt(
|
||||
const LOGGING_INTERVAL = parseInt(process.env.LOGGING_INTERVAL || '60000', 10)
|
||||
const SLEEP_BEFORE_EXIT = parseInt(process.env.SLEEP_BEFORE_EXIT || '1000', 10)
|
||||
|
||||
+// Filestore endpoint location, the port is always hardcoded
|
||||
+const FILESTORE_HOST = process.env.FILESTORE_HOST || '127.0.0.1'
|
||||
+const FILESTORE_PORT = process.env.FILESTORE_PORT || '3009'
|
||||
+
|
||||
+async function fetchFromFilestore(projectId, fileId) {
|
||||
+ const url = `http://${FILESTORE_HOST}:${FILESTORE_PORT}/project/${projectId}/file/${fileId}`
|
||||
+ const response = await fetch(url)
|
||||
+ if (!response.ok) {
|
||||
+ if (response.status === 404) {
|
||||
+ throw new NotFoundError('file not found in filestore', {
|
||||
+ status: response.status,
|
||||
+ })
|
||||
+ }
|
||||
+ const body = await response.text()
|
||||
+ throw new OError('fetchFromFilestore failed', {
|
||||
+ projectId,
|
||||
+ fileId,
|
||||
+ status: response.status,
|
||||
+ body,
|
||||
+ })
|
||||
+ }
|
||||
+ if (!response.body) {
|
||||
+ throw new OError('fetchFromFilestore response has no body', {
|
||||
+ projectId,
|
||||
+ fileId,
|
||||
+ status: response.status,
|
||||
+ })
|
||||
+ }
|
||||
+ return response.body
|
||||
+}
|
||||
+
|
||||
const projectsCollection = db.collection('projects')
|
||||
/** @type {ProjectsCollection} */
|
||||
const typedProjectsCollection = db.collection('projects')
|
||||
@@ -348,8 +378,7 @@ async function processFile(entry, filePath) {
|
||||
} catch (err) {
|
||||
if (gracefulShutdownInitiated) throw err
|
||||
if (err instanceof NotFoundError) {
|
||||
- const { bucketName } = OError.getFullInfo(err)
|
||||
- if (bucketName === USER_FILES_BUCKET_NAME && !RETRY_FILESTORE_404) {
|
||||
+ if (!RETRY_FILESTORE_404) {
|
||||
throw err // disable retries for not found in filestore bucket case
|
||||
}
|
||||
}
|
||||
@@ -416,10 +445,8 @@ async function processFileOnce(entry, filePath) {
|
||||
}
|
||||
|
||||
STATS.readFromGCSCount++
|
||||
- const src = await filestorePersistor.getObjectStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectId}/${fileId}`
|
||||
- )
|
||||
+ // make a fetch request to filestore itself
|
||||
+ const src = await fetchFromFilestore(projectId, fileId)
|
||||
const dst = fs.createWriteStream(filePath, {
|
||||
highWaterMark: STREAM_HIGH_WATER_MARK,
|
||||
})
|
||||
@@ -1327,14 +1354,21 @@ async function processDeletedProjects() {
|
||||
}
|
||||
|
||||
async function main() {
|
||||
+ console.log('Starting project file backup...')
|
||||
await loadGlobalBlobs()
|
||||
+ console.log('Loaded global blobs:', GLOBAL_BLOBS.size)
|
||||
if (PROJECT_IDS_FROM) {
|
||||
+ console.log(
|
||||
+ `Processing projects from file: ${PROJECT_IDS_FROM}, this may take a while...`
|
||||
+ )
|
||||
await processProjectsFromFile()
|
||||
} else {
|
||||
if (PROCESS_NON_DELETED_PROJECTS) {
|
||||
+ console.log('Processing non-deleted projects...')
|
||||
await processNonDeletedProjects()
|
||||
}
|
||||
if (PROCESS_DELETED_PROJECTS) {
|
||||
+ console.log('Processing deleted projects...')
|
||||
await processDeletedProjects()
|
||||
}
|
||||
}
|
||||
diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
index fd39369a7189..4e697b8bec2c 100644
|
||||
--- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
+++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
@@ -15,7 +15,6 @@ import { execFile } from 'node:child_process'
|
||||
import chai, { expect } from 'chai'
|
||||
import chaiExclude from 'chai-exclude'
|
||||
import config from 'config'
|
||||
-import ObjectPersistor from '@overleaf/object-persistor'
|
||||
import { WritableBuffer } from '@overleaf/stream-utils'
|
||||
import {
|
||||
backupPersistor,
|
||||
@@ -27,6 +26,9 @@ import {
|
||||
makeProjectKey,
|
||||
} from '../../../../storage/lib/blob_store/index.js'
|
||||
|
||||
+import express from 'express'
|
||||
+import bodyParser from 'body-parser'
|
||||
+
|
||||
chai.use(chaiExclude)
|
||||
const TIMEOUT = 20 * 1_000
|
||||
|
||||
@@ -36,15 +38,60 @@ const { tieringStorageClass } = config.get('backupPersistor')
|
||||
const projectsCollection = db.collection('projects')
|
||||
const deletedProjectsCollection = db.collection('deletedProjects')
|
||||
|
||||
-const FILESTORE_PERSISTOR = ObjectPersistor({
|
||||
- backend: 'gcs',
|
||||
- gcs: {
|
||||
- endpoint: {
|
||||
- apiEndpoint: process.env.GCS_API_ENDPOINT,
|
||||
- projectId: process.env.GCS_PROJECT_ID,
|
||||
- },
|
||||
- },
|
||||
-})
|
||||
+class MockFilestore {
|
||||
+ constructor() {
|
||||
+ this.host = process.env.FILESTORE_HOST || '127.0.0.1'
|
||||
+ this.port = process.env.FILESTORE_PORT || 3009
|
||||
+ // create a server listening on this.host and this.port
|
||||
+ this.files = {}
|
||||
+
|
||||
+ this.app = express()
|
||||
+ this.app.use(bodyParser.json())
|
||||
+ this.app.use(bodyParser.urlencoded({ extended: true }))
|
||||
+
|
||||
+ this.app.get('/project/:projectId/file/:fileId', (req, res) => {
|
||||
+ const { projectId, fileId } = req.params
|
||||
+ const content = this.files[projectId]?.[fileId]
|
||||
+ if (!content) return res.status(404).end()
|
||||
+ res.status(200).end(content)
|
||||
+ })
|
||||
+ }
|
||||
+
|
||||
+ start() {
|
||||
+ // reset stored files
|
||||
+ this.files = {}
|
||||
+ // start the server
|
||||
+ if (this.serverPromise) {
|
||||
+ return this.serverPromise
|
||||
+ } else {
|
||||
+ this.serverPromise = new Promise((resolve, reject) => {
|
||||
+ this.server = this.app.listen(this.port, this.host, err => {
|
||||
+ if (err) return reject(err)
|
||||
+ resolve()
|
||||
+ })
|
||||
+ })
|
||||
+ return this.serverPromise
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ addFile(projectId, fileId, fileContent) {
|
||||
+ if (!this.files[projectId]) {
|
||||
+ this.files[projectId] = {}
|
||||
+ }
|
||||
+ this.files[projectId][fileId] = fileContent
|
||||
+ }
|
||||
+
|
||||
+ deleteObject(projectId, fileId) {
|
||||
+ if (this.files[projectId]) {
|
||||
+ delete this.files[projectId][fileId]
|
||||
+ if (Object.keys(this.files[projectId]).length === 0) {
|
||||
+ delete this.files[projectId]
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+const mockFilestore = new MockFilestore()
|
||||
|
||||
/**
|
||||
* @param {ObjectId} objectId
|
||||
@@ -472,67 +519,36 @@ describe('back_fill_file_hash script', function () {
|
||||
}
|
||||
|
||||
async function populateFilestore() {
|
||||
- await FILESTORE_PERSISTOR.sendStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectId0}/${fileId0}`,
|
||||
- Stream.Readable.from([fileId0.toString()])
|
||||
- )
|
||||
- await FILESTORE_PERSISTOR.sendStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectId0}/${fileId6}`,
|
||||
- Stream.Readable.from([fileId6.toString()])
|
||||
- )
|
||||
- await FILESTORE_PERSISTOR.sendStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectId0}/${fileId7}`,
|
||||
- Stream.Readable.from([contentFile7])
|
||||
- )
|
||||
- await FILESTORE_PERSISTOR.sendStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectId1}/${fileId1}`,
|
||||
- Stream.Readable.from([fileId1.toString()])
|
||||
- )
|
||||
- await FILESTORE_PERSISTOR.sendStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectId2}/${fileId2}`,
|
||||
- Stream.Readable.from([fileId2.toString()])
|
||||
- )
|
||||
- await FILESTORE_PERSISTOR.sendStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectId3}/${fileId3}`,
|
||||
- Stream.Readable.from([fileId3.toString()])
|
||||
- )
|
||||
- await FILESTORE_PERSISTOR.sendStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectId3}/${fileId10}`,
|
||||
+ await mockFilestore.addFile(projectId0, fileId0, fileId0.toString())
|
||||
+ await mockFilestore.addFile(projectId0, fileId6, fileId6.toString())
|
||||
+ await mockFilestore.addFile(projectId0, fileId7, contentFile7)
|
||||
+ await mockFilestore.addFile(projectId1, fileId1, fileId1.toString())
|
||||
+ await mockFilestore.addFile(projectId2, fileId2, fileId2.toString())
|
||||
+ await mockFilestore.addFile(projectId3, fileId3, fileId3.toString())
|
||||
+ await mockFilestore.addFile(
|
||||
+ projectId3,
|
||||
+ fileId10,
|
||||
// fileId10 is dupe of fileId3
|
||||
- Stream.Readable.from([fileId3.toString()])
|
||||
+ fileId3.toString()
|
||||
)
|
||||
- await FILESTORE_PERSISTOR.sendStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectId3}/${fileId11}`,
|
||||
+ await mockFilestore.addFile(
|
||||
+ projectId3,
|
||||
+ fileId11,
|
||||
// fileId11 is dupe of fileId3
|
||||
- Stream.Readable.from([fileId3.toString()])
|
||||
- )
|
||||
- await FILESTORE_PERSISTOR.sendStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectIdDeleted0}/${fileId4}`,
|
||||
- Stream.Readable.from([fileId4.toString()])
|
||||
+ fileId3.toString()
|
||||
)
|
||||
- await FILESTORE_PERSISTOR.sendStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectIdDeleted1}/${fileId5}`,
|
||||
- Stream.Readable.from([fileId5.toString()])
|
||||
- )
|
||||
- await FILESTORE_PERSISTOR.sendStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectIdBadFileTree3}/${fileId9}`,
|
||||
- Stream.Readable.from([fileId9.toString()])
|
||||
+ await mockFilestore.addFile(projectIdDeleted0, fileId4, fileId4.toString())
|
||||
+ await mockFilestore.addFile(projectIdDeleted1, fileId5, fileId5.toString())
|
||||
+ await mockFilestore.addFile(
|
||||
+ projectIdBadFileTree3,
|
||||
+ fileId9,
|
||||
+ fileId9.toString()
|
||||
)
|
||||
}
|
||||
|
||||
async function prepareEnvironment() {
|
||||
await cleanup.everything()
|
||||
+ await mockFilestore.start()
|
||||
await populateMongo()
|
||||
await populateHistoryV1()
|
||||
await populateFilestore()
|
||||
@@ -1117,10 +1133,7 @@ describe('back_fill_file_hash script', function () {
|
||||
beforeEach('prepare environment', prepareEnvironment)
|
||||
|
||||
it('should gracefully handle fatal errors', async function () {
|
||||
- await FILESTORE_PERSISTOR.deleteObject(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectId0}/${fileId0}`
|
||||
- )
|
||||
+ mockFilestore.deleteObject(projectId0, fileId0)
|
||||
const t0 = Date.now()
|
||||
const { stats, result } = await tryRunScript([], {
|
||||
RETRIES: '10',
|
||||
@@ -1148,17 +1161,10 @@ describe('back_fill_file_hash script', function () {
|
||||
})
|
||||
|
||||
it('should retry on error', async function () {
|
||||
- await FILESTORE_PERSISTOR.deleteObject(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectId0}/${fileId0}`
|
||||
- )
|
||||
+ mockFilestore.deleteObject(projectId0, fileId0)
|
||||
const restoreFileAfter5s = async () => {
|
||||
await setTimeout(5_000)
|
||||
- await FILESTORE_PERSISTOR.sendStream(
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- `${projectId0}/${fileId0}`,
|
||||
- Stream.Readable.from([fileId0.toString()])
|
||||
- )
|
||||
+ mockFilestore.addFile(projectId0, fileId0, fileId0.toString())
|
||||
}
|
||||
// use Promise.allSettled to ensure the above sendStream call finishes before this test completes
|
||||
const [
|
||||
|
||||
|
||||
|
||||
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
index feb4612ddc23..5a590e347a94 100644
|
||||
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
@@ -178,7 +178,7 @@ const STREAM_HIGH_WATER_MARK = parseInt(
|
||||
const LOGGING_INTERVAL = parseInt(process.env.LOGGING_INTERVAL || '60000', 10)
|
||||
const SLEEP_BEFORE_EXIT = parseInt(process.env.SLEEP_BEFORE_EXIT || '1000', 10)
|
||||
|
||||
-// Filestore endpoint location, the port is always hardcoded
|
||||
+// Filestore endpoint location
|
||||
const FILESTORE_HOST = process.env.FILESTORE_HOST || '127.0.0.1'
|
||||
const FILESTORE_PORT = process.env.FILESTORE_PORT || '3009'
|
||||
|
||||
|
||||
|
||||
|
||||
diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
index 4e697b8bec2c..8f861d393451 100644
|
||||
--- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
+++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
@@ -27,7 +27,6 @@ import {
|
||||
} from '../../../../storage/lib/blob_store/index.js'
|
||||
|
||||
import express from 'express'
|
||||
-import bodyParser from 'body-parser'
|
||||
|
||||
chai.use(chaiExclude)
|
||||
const TIMEOUT = 20 * 1_000
|
||||
@@ -46,8 +45,6 @@ class MockFilestore {
|
||||
this.files = {}
|
||||
|
||||
this.app = express()
|
||||
- this.app.use(bodyParser.json())
|
||||
- this.app.use(bodyParser.urlencoded({ extended: true }))
|
||||
|
||||
this.app.get('/project/:projectId/file/:fileId', (req, res) => {
|
||||
const { projectId, fileId } = req.params
|
||||
|
||||
961
server-ce/hotfix/5.5.3/pr_27173.patch
Normal file
961
server-ce/hotfix/5.5.3/pr_27173.patch
Normal file
@@ -0,0 +1,961 @@
|
||||
|
||||
|
||||
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
index 5a590e347a9..3be1c8a5407 100644
|
||||
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
@@ -1,28 +1,20 @@
|
||||
// @ts-check
|
||||
-import Crypto from 'node:crypto'
|
||||
import Events from 'node:events'
|
||||
import fs from 'node:fs'
|
||||
import Path from 'node:path'
|
||||
import { performance } from 'node:perf_hooks'
|
||||
import Stream from 'node:stream'
|
||||
-import zLib from 'node:zlib'
|
||||
import { setTimeout } from 'node:timers/promises'
|
||||
-import { Binary, ObjectId } from 'mongodb'
|
||||
+import { ObjectId } from 'mongodb'
|
||||
import pLimit from 'p-limit'
|
||||
import logger from '@overleaf/logger'
|
||||
import {
|
||||
batchedUpdate,
|
||||
objectIdFromInput,
|
||||
renderObjectId,
|
||||
- READ_PREFERENCE_SECONDARY,
|
||||
} from '@overleaf/mongo-utils/batchedUpdate.js'
|
||||
import OError from '@overleaf/o-error'
|
||||
-import {
|
||||
- AlreadyWrittenError,
|
||||
- NoKEKMatchedError,
|
||||
- NotFoundError,
|
||||
-} from '@overleaf/object-persistor/src/Errors.js'
|
||||
-import { backupPersistor, projectBlobsBucket } from '../lib/backupPersistor.mjs'
|
||||
+import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
|
||||
import {
|
||||
BlobStore,
|
||||
GLOBAL_BLOBS,
|
||||
@@ -30,9 +22,8 @@ import {
|
||||
getProjectBlobsBatch,
|
||||
getStringLengthOfFile,
|
||||
makeBlobForFile,
|
||||
- makeProjectKey,
|
||||
} from '../lib/blob_store/index.js'
|
||||
-import { backedUpBlobs as backedUpBlobsCollection, db } from '../lib/mongodb.js'
|
||||
+import { db } from '../lib/mongodb.js'
|
||||
import commandLineArgs from 'command-line-args'
|
||||
import readline from 'node:readline'
|
||||
|
||||
@@ -88,7 +79,7 @@ ObjectId.cacheHexString = true
|
||||
*/
|
||||
|
||||
/**
|
||||
- * @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, PROCESS_BLOBS: boolean, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean, COLLECT_BACKED_UP_BLOBS: boolean}}
|
||||
+ * @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, PROCESS_BLOBS: boolean, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean}}
|
||||
*/
|
||||
function parseArgs() {
|
||||
const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z')
|
||||
@@ -98,7 +89,6 @@ function parseArgs() {
|
||||
{ name: 'processHashedFiles', type: String, defaultValue: 'false' },
|
||||
{ name: 'processBlobs', type: String, defaultValue: 'true' },
|
||||
{ name: 'projectIdsFrom', type: String, defaultValue: '' },
|
||||
- { name: 'collectBackedUpBlobs', type: String, defaultValue: 'true' },
|
||||
{
|
||||
name: 'BATCH_RANGE_START',
|
||||
type: String,
|
||||
@@ -130,7 +120,6 @@ function parseArgs() {
|
||||
PROCESS_DELETED_PROJECTS: boolVal('processDeletedProjects'),
|
||||
PROCESS_BLOBS: boolVal('processBlobs'),
|
||||
PROCESS_HASHED_FILES: boolVal('processHashedFiles'),
|
||||
- COLLECT_BACKED_UP_BLOBS: boolVal('collectBackedUpBlobs'),
|
||||
BATCH_RANGE_START,
|
||||
BATCH_RANGE_END,
|
||||
LOGGING_IDENTIFIER: args['LOGGING_IDENTIFIER'] || BATCH_RANGE_START,
|
||||
@@ -143,7 +132,6 @@ const {
|
||||
PROCESS_DELETED_PROJECTS,
|
||||
PROCESS_BLOBS,
|
||||
PROCESS_HASHED_FILES,
|
||||
- COLLECT_BACKED_UP_BLOBS,
|
||||
BATCH_RANGE_START,
|
||||
BATCH_RANGE_END,
|
||||
LOGGING_IDENTIFIER,
|
||||
@@ -232,7 +220,6 @@ async function processConcurrently(array, fn) {
|
||||
const STATS = {
|
||||
projects: 0,
|
||||
blobs: 0,
|
||||
- backedUpBlobs: 0,
|
||||
filesWithHash: 0,
|
||||
filesWithoutHash: 0,
|
||||
filesDuplicated: 0,
|
||||
@@ -246,14 +233,8 @@ const STATS = {
|
||||
projectHardDeleted: 0,
|
||||
fileHardDeleted: 0,
|
||||
mongoUpdates: 0,
|
||||
- deduplicatedWriteToAWSLocalCount: 0,
|
||||
- deduplicatedWriteToAWSLocalEgress: 0,
|
||||
- deduplicatedWriteToAWSRemoteCount: 0,
|
||||
- deduplicatedWriteToAWSRemoteEgress: 0,
|
||||
readFromGCSCount: 0,
|
||||
readFromGCSIngress: 0,
|
||||
- writeToAWSCount: 0,
|
||||
- writeToAWSEgress: 0,
|
||||
writeToGCSCount: 0,
|
||||
writeToGCSEgress: 0,
|
||||
}
|
||||
@@ -275,7 +256,7 @@ function toMiBPerSecond(v, ms) {
|
||||
/**
|
||||
* @param {any} stats
|
||||
* @param {number} ms
|
||||
- * @return {{writeToAWSThroughputMiBPerSecond: number, readFromGCSThroughputMiBPerSecond: number}}
|
||||
+ * @return {{readFromGCSThroughputMiBPerSecond: number}}
|
||||
*/
|
||||
function bandwidthStats(stats, ms) {
|
||||
return {
|
||||
@@ -283,10 +264,6 @@ function bandwidthStats(stats, ms) {
|
||||
stats.readFromGCSIngress,
|
||||
ms
|
||||
),
|
||||
- writeToAWSThroughputMiBPerSecond: toMiBPerSecond(
|
||||
- stats.writeToAWSEgress,
|
||||
- ms
|
||||
- ),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -382,9 +359,6 @@ async function processFile(entry, filePath) {
|
||||
throw err // disable retries for not found in filestore bucket case
|
||||
}
|
||||
}
|
||||
- if (err instanceof NoKEKMatchedError) {
|
||||
- throw err // disable retries when upload to S3 will fail again
|
||||
- }
|
||||
STATS.filesRetries++
|
||||
const {
|
||||
ctx: { projectId },
|
||||
@@ -417,32 +391,8 @@ async function processFileOnce(entry, filePath) {
|
||||
if (entry.blob) {
|
||||
const { blob } = entry
|
||||
const hash = blob.getHash()
|
||||
- if (entry.ctx.hasBackedUpBlob(hash)) {
|
||||
- STATS.deduplicatedWriteToAWSLocalCount++
|
||||
- STATS.deduplicatedWriteToAWSLocalEgress += estimateBlobSize(blob)
|
||||
- return hash
|
||||
- }
|
||||
- entry.ctx.recordPendingBlob(hash)
|
||||
- STATS.readFromGCSCount++
|
||||
- const src = await blobStore.getStream(hash)
|
||||
- const dst = fs.createWriteStream(filePath, {
|
||||
- highWaterMark: STREAM_HIGH_WATER_MARK,
|
||||
- })
|
||||
- try {
|
||||
- await Stream.promises.pipeline(src, dst)
|
||||
- } finally {
|
||||
- STATS.readFromGCSIngress += dst.bytesWritten
|
||||
- }
|
||||
- await uploadBlobToAWS(entry, blob, filePath)
|
||||
return hash
|
||||
}
|
||||
- if (entry.hash && entry.ctx.hasBackedUpBlob(entry.hash)) {
|
||||
- STATS.deduplicatedWriteToAWSLocalCount++
|
||||
- const blob = entry.ctx.getCachedHistoryBlob(entry.hash)
|
||||
- // blob might not exist on re-run with --PROCESS_BLOBS=false
|
||||
- if (blob) STATS.deduplicatedWriteToAWSLocalEgress += estimateBlobSize(blob)
|
||||
- return entry.hash
|
||||
- }
|
||||
|
||||
STATS.readFromGCSCount++
|
||||
// make a fetch request to filestore itself
|
||||
@@ -469,16 +419,14 @@ async function processFileOnce(entry, filePath) {
|
||||
STATS.globalBlobsEgress += estimateBlobSize(blob)
|
||||
return hash
|
||||
}
|
||||
- if (entry.ctx.hasBackedUpBlob(hash)) {
|
||||
- STATS.deduplicatedWriteToAWSLocalCount++
|
||||
- STATS.deduplicatedWriteToAWSLocalEgress += estimateBlobSize(blob)
|
||||
+ if (entry.ctx.hasCompletedBlob(hash)) {
|
||||
return hash
|
||||
}
|
||||
entry.ctx.recordPendingBlob(hash)
|
||||
|
||||
try {
|
||||
await uploadBlobToGCS(blobStore, entry, blob, hash, filePath)
|
||||
- await uploadBlobToAWS(entry, blob, filePath)
|
||||
+ entry.ctx.recordCompletedBlob(hash) // mark upload as completed
|
||||
} catch (err) {
|
||||
entry.ctx.recordFailedBlob(hash)
|
||||
throw err
|
||||
@@ -515,76 +463,6 @@ async function uploadBlobToGCS(blobStore, entry, blob, hash, filePath) {
|
||||
|
||||
const GZ_SUFFIX = '.gz'
|
||||
|
||||
-/**
|
||||
- * @param {QueueEntry} entry
|
||||
- * @param {Blob} blob
|
||||
- * @param {string} filePath
|
||||
- * @return {Promise<void>}
|
||||
- */
|
||||
-async function uploadBlobToAWS(entry, blob, filePath) {
|
||||
- const { historyId } = entry.ctx
|
||||
- let backupSource
|
||||
- let contentEncoding
|
||||
- const md5 = Crypto.createHash('md5')
|
||||
- let size
|
||||
- if (blob.getStringLength()) {
|
||||
- const filePathCompressed = filePath + GZ_SUFFIX
|
||||
- backupSource = filePathCompressed
|
||||
- contentEncoding = 'gzip'
|
||||
- size = 0
|
||||
- await Stream.promises.pipeline(
|
||||
- fs.createReadStream(filePath, { highWaterMark: STREAM_HIGH_WATER_MARK }),
|
||||
- zLib.createGzip(),
|
||||
- async function* (source) {
|
||||
- for await (const chunk of source) {
|
||||
- size += chunk.byteLength
|
||||
- md5.update(chunk)
|
||||
- yield chunk
|
||||
- }
|
||||
- },
|
||||
- fs.createWriteStream(filePathCompressed, {
|
||||
- highWaterMark: STREAM_HIGH_WATER_MARK,
|
||||
- })
|
||||
- )
|
||||
- } else {
|
||||
- backupSource = filePath
|
||||
- size = blob.getByteLength()
|
||||
- await Stream.promises.pipeline(
|
||||
- fs.createReadStream(filePath, { highWaterMark: STREAM_HIGH_WATER_MARK }),
|
||||
- md5
|
||||
- )
|
||||
- }
|
||||
- const backendKeyPath = makeProjectKey(historyId, blob.getHash())
|
||||
- const persistor = await entry.ctx.getCachedPersistor(backendKeyPath)
|
||||
- try {
|
||||
- STATS.writeToAWSCount++
|
||||
- await persistor.sendStream(
|
||||
- projectBlobsBucket,
|
||||
- backendKeyPath,
|
||||
- fs.createReadStream(backupSource, {
|
||||
- highWaterMark: STREAM_HIGH_WATER_MARK,
|
||||
- }),
|
||||
- {
|
||||
- contentEncoding,
|
||||
- contentType: 'application/octet-stream',
|
||||
- contentLength: size,
|
||||
- sourceMd5: md5.digest('hex'),
|
||||
- ifNoneMatch: '*', // de-duplicate write (we pay for the request, but avoid egress)
|
||||
- }
|
||||
- )
|
||||
- STATS.writeToAWSEgress += size
|
||||
- } catch (err) {
|
||||
- if (err instanceof AlreadyWrittenError) {
|
||||
- STATS.deduplicatedWriteToAWSRemoteCount++
|
||||
- STATS.deduplicatedWriteToAWSRemoteEgress += size
|
||||
- } else {
|
||||
- STATS.writeToAWSEgress += size
|
||||
- throw err
|
||||
- }
|
||||
- }
|
||||
- entry.ctx.recordBackedUpBlob(blob.getHash())
|
||||
-}
|
||||
-
|
||||
/**
|
||||
* @param {Array<QueueEntry>} files
|
||||
* @return {Promise<void>}
|
||||
@@ -670,23 +548,18 @@ async function queueNextBatch(batch, prefix = 'rootFolder.0') {
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async function processBatch(batch, prefix = 'rootFolder.0') {
|
||||
- const [{ nBlobs, blobs }, { nBackedUpBlobs, backedUpBlobs }] =
|
||||
- await Promise.all([collectProjectBlobs(batch), collectBackedUpBlobs(batch)])
|
||||
- const files = Array.from(findFileInBatch(batch, prefix, blobs, backedUpBlobs))
|
||||
+ const { nBlobs, blobs } = await collectProjectBlobs(batch)
|
||||
+ const files = Array.from(findFileInBatch(batch, prefix, blobs))
|
||||
STATS.projects += batch.length
|
||||
STATS.blobs += nBlobs
|
||||
- STATS.backedUpBlobs += nBackedUpBlobs
|
||||
|
||||
// GC
|
||||
batch.length = 0
|
||||
blobs.clear()
|
||||
- backedUpBlobs.clear()
|
||||
|
||||
// The files are currently ordered by project-id.
|
||||
// Order them by file-id ASC then blobs ASC to
|
||||
// - process files before blobs
|
||||
- // - avoid head-of-line blocking from many project-files waiting on the generation of the projects DEK (round trip to AWS)
|
||||
- // - bonus: increase chance of de-duplicating write to AWS
|
||||
files.sort(
|
||||
/**
|
||||
* @param {QueueEntry} a
|
||||
@@ -903,23 +776,15 @@ function* findFiles(ctx, folder, path, isInputLoop = false) {
|
||||
* @param {Array<Project>} projects
|
||||
* @param {string} prefix
|
||||
* @param {Map<string,Array<Blob>>} blobs
|
||||
- * @param {Map<string,Array<string>>} backedUpBlobs
|
||||
* @return Generator<QueueEntry>
|
||||
*/
|
||||
-function* findFileInBatch(projects, prefix, blobs, backedUpBlobs) {
|
||||
+function* findFileInBatch(projects, prefix, blobs) {
|
||||
for (const project of projects) {
|
||||
const projectIdS = project._id.toString()
|
||||
const historyIdS = project.overleaf.history.id.toString()
|
||||
const projectBlobs = blobs.get(historyIdS) || []
|
||||
- const projectBackedUpBlobs = new Set(backedUpBlobs.get(projectIdS) || [])
|
||||
- const ctx = new ProjectContext(
|
||||
- project._id,
|
||||
- historyIdS,
|
||||
- projectBlobs,
|
||||
- projectBackedUpBlobs
|
||||
- )
|
||||
+ const ctx = new ProjectContext(project._id, historyIdS, projectBlobs)
|
||||
for (const blob of projectBlobs) {
|
||||
- if (projectBackedUpBlobs.has(blob.getHash())) continue
|
||||
ctx.remainingQueueEntries++
|
||||
yield {
|
||||
ctx,
|
||||
@@ -951,42 +816,11 @@ async function collectProjectBlobs(batch) {
|
||||
return await getProjectBlobsBatch(batch.map(p => p.overleaf.history.id))
|
||||
}
|
||||
|
||||
-/**
|
||||
- * @param {Array<Project>} projects
|
||||
- * @return {Promise<{nBackedUpBlobs:number,backedUpBlobs:Map<string,Array<string>>}>}
|
||||
- */
|
||||
-async function collectBackedUpBlobs(projects) {
|
||||
- let nBackedUpBlobs = 0
|
||||
- const backedUpBlobs = new Map()
|
||||
- if (!COLLECT_BACKED_UP_BLOBS) return { nBackedUpBlobs, backedUpBlobs }
|
||||
-
|
||||
- const cursor = backedUpBlobsCollection.find(
|
||||
- { _id: { $in: projects.map(p => p._id) } },
|
||||
- {
|
||||
- readPreference: READ_PREFERENCE_SECONDARY,
|
||||
- sort: { _id: 1 },
|
||||
- }
|
||||
- )
|
||||
- for await (const record of cursor) {
|
||||
- const blobs = record.blobs.map(b => b.toString('hex'))
|
||||
- backedUpBlobs.set(record._id.toString(), blobs)
|
||||
- nBackedUpBlobs += blobs.length
|
||||
- }
|
||||
- return { nBackedUpBlobs, backedUpBlobs }
|
||||
-}
|
||||
-
|
||||
-const BATCH_HASH_WRITES = 1_000
|
||||
const BATCH_FILE_UPDATES = 100
|
||||
|
||||
const MONGO_PATH_SKIP_WRITE_HASH_TO_FILE_TREE = 'skip-write-to-file-tree'
|
||||
|
||||
class ProjectContext {
|
||||
- /** @type {Promise<CachedPerProjectEncryptedS3Persistor> | null} */
|
||||
- #cachedPersistorPromise = null
|
||||
-
|
||||
- /** @type {Set<string>} */
|
||||
- #backedUpBlobs
|
||||
-
|
||||
/** @type {Map<string, Blob>} */
|
||||
#historyBlobs
|
||||
|
||||
@@ -1000,12 +834,10 @@ class ProjectContext {
|
||||
* @param {ObjectId} projectId
|
||||
* @param {string} historyId
|
||||
* @param {Array<Blob>} blobs
|
||||
- * @param {Set<string>} backedUpBlobs
|
||||
*/
|
||||
- constructor(projectId, historyId, blobs, backedUpBlobs) {
|
||||
+ constructor(projectId, historyId, blobs) {
|
||||
this.projectId = projectId
|
||||
this.historyId = historyId
|
||||
- this.#backedUpBlobs = backedUpBlobs
|
||||
this.#historyBlobs = new Map(blobs.map(b => [b.getHash(), b]))
|
||||
}
|
||||
|
||||
@@ -1034,75 +866,17 @@ class ProjectContext {
|
||||
return false
|
||||
}
|
||||
|
||||
- /**
|
||||
- * @param {string} key
|
||||
- * @return {Promise<CachedPerProjectEncryptedS3Persistor>}
|
||||
- */
|
||||
- getCachedPersistor(key) {
|
||||
- if (!this.#cachedPersistorPromise) {
|
||||
- // Fetch DEK once, but only if needed -- upon the first use
|
||||
- this.#cachedPersistorPromise = this.#getCachedPersistorWithRetries(key)
|
||||
- }
|
||||
- return this.#cachedPersistorPromise
|
||||
- }
|
||||
-
|
||||
- /**
|
||||
- * @param {string} key
|
||||
- * @return {Promise<CachedPerProjectEncryptedS3Persistor>}
|
||||
- */
|
||||
- async #getCachedPersistorWithRetries(key) {
|
||||
- // Optimization: Skip GET on DEK in case no blobs are marked as backed up yet.
|
||||
- let tryGenerateDEKFirst = this.#backedUpBlobs.size === 0
|
||||
- for (let attempt = 0; attempt < RETRIES; attempt++) {
|
||||
- try {
|
||||
- if (tryGenerateDEKFirst) {
|
||||
- try {
|
||||
- return await backupPersistor.generateDataEncryptionKey(
|
||||
- projectBlobsBucket,
|
||||
- key
|
||||
- )
|
||||
- } catch (err) {
|
||||
- if (err instanceof AlreadyWrittenError) {
|
||||
- tryGenerateDEKFirst = false
|
||||
- // fall back to GET below
|
||||
- } else {
|
||||
- throw err
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
- return await backupPersistor.forProject(projectBlobsBucket, key)
|
||||
- } catch (err) {
|
||||
- if (gracefulShutdownInitiated) throw err
|
||||
- if (err instanceof NoKEKMatchedError) {
|
||||
- throw err
|
||||
- } else {
|
||||
- logger.warn(
|
||||
- { err, projectId: this.projectId, attempt },
|
||||
- 'failed to get DEK, trying again'
|
||||
- )
|
||||
- const jitter = Math.random() * RETRY_DELAY_MS
|
||||
- await setTimeout(RETRY_DELAY_MS + jitter)
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
- return await backupPersistor.forProject(projectBlobsBucket, key)
|
||||
- }
|
||||
-
|
||||
async flushMongoQueuesIfNeeded() {
|
||||
if (this.remainingQueueEntries === 0) {
|
||||
await this.flushMongoQueues()
|
||||
}
|
||||
|
||||
- if (this.#completedBlobs.size > BATCH_HASH_WRITES) {
|
||||
- await this.#storeBackedUpBlobs()
|
||||
- }
|
||||
if (this.#pendingFileWrites.length > BATCH_FILE_UPDATES) {
|
||||
await this.#storeFileHashes()
|
||||
}
|
||||
}
|
||||
|
||||
async flushMongoQueues() {
|
||||
- await this.#storeBackedUpBlobs()
|
||||
await this.#storeFileHashes()
|
||||
}
|
||||
|
||||
@@ -1111,20 +885,6 @@ class ProjectContext {
|
||||
/** @type {Set<string>} */
|
||||
#completedBlobs = new Set()
|
||||
|
||||
- async #storeBackedUpBlobs() {
|
||||
- if (this.#completedBlobs.size === 0) return
|
||||
- const blobs = Array.from(this.#completedBlobs).map(
|
||||
- hash => new Binary(Buffer.from(hash, 'hex'))
|
||||
- )
|
||||
- this.#completedBlobs.clear()
|
||||
- STATS.mongoUpdates++
|
||||
- await backedUpBlobsCollection.updateOne(
|
||||
- { _id: this.projectId },
|
||||
- { $addToSet: { blobs: { $each: blobs } } },
|
||||
- { upsert: true }
|
||||
- )
|
||||
- }
|
||||
-
|
||||
/**
|
||||
* @param {string} hash
|
||||
*/
|
||||
@@ -1142,8 +902,7 @@ class ProjectContext {
|
||||
/**
|
||||
* @param {string} hash
|
||||
*/
|
||||
- recordBackedUpBlob(hash) {
|
||||
- this.#backedUpBlobs.add(hash)
|
||||
+ recordCompletedBlob(hash) {
|
||||
this.#completedBlobs.add(hash)
|
||||
this.#pendingBlobs.delete(hash)
|
||||
}
|
||||
@@ -1152,12 +911,8 @@ class ProjectContext {
|
||||
* @param {string} hash
|
||||
* @return {boolean}
|
||||
*/
|
||||
- hasBackedUpBlob(hash) {
|
||||
- return (
|
||||
- this.#pendingBlobs.has(hash) ||
|
||||
- this.#completedBlobs.has(hash) ||
|
||||
- this.#backedUpBlobs.has(hash)
|
||||
- )
|
||||
+ hasCompletedBlob(hash) {
|
||||
+ return this.#pendingBlobs.has(hash) || this.#completedBlobs.has(hash)
|
||||
}
|
||||
|
||||
/** @type {Array<QueueEntry>} */
|
||||
diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
index 8f861d39345..62b0b1de25f 100644
|
||||
--- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
+++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
@@ -4,23 +4,17 @@ import Stream from 'node:stream'
|
||||
import { setTimeout } from 'node:timers/promises'
|
||||
import { promisify } from 'node:util'
|
||||
import { ObjectId, Binary } from 'mongodb'
|
||||
-import {
|
||||
- db,
|
||||
- backedUpBlobs,
|
||||
- globalBlobs,
|
||||
-} from '../../../../storage/lib/mongodb.js'
|
||||
+import { db, globalBlobs } from '../../../../storage/lib/mongodb.js'
|
||||
import cleanup from './support/cleanup.js'
|
||||
import testProjects from '../api/support/test_projects.js'
|
||||
import { execFile } from 'node:child_process'
|
||||
import chai, { expect } from 'chai'
|
||||
import chaiExclude from 'chai-exclude'
|
||||
-import config from 'config'
|
||||
import { WritableBuffer } from '@overleaf/stream-utils'
|
||||
import {
|
||||
backupPersistor,
|
||||
projectBlobsBucket,
|
||||
} from '../../../../storage/lib/backupPersistor.mjs'
|
||||
-import projectKey from '../../../../storage/lib/project_key.js'
|
||||
import {
|
||||
BlobStore,
|
||||
makeProjectKey,
|
||||
@@ -31,9 +25,6 @@ import express from 'express'
|
||||
chai.use(chaiExclude)
|
||||
const TIMEOUT = 20 * 1_000
|
||||
|
||||
-const { deksBucket } = config.get('backupStore')
|
||||
-const { tieringStorageClass } = config.get('backupPersistor')
|
||||
-
|
||||
const projectsCollection = db.collection('projects')
|
||||
const deletedProjectsCollection = db.collection('deletedProjects')
|
||||
|
||||
@@ -117,17 +108,6 @@ function binaryForGitBlobHash(gitBlobHash) {
|
||||
return new Binary(Buffer.from(gitBlobHash, 'hex'))
|
||||
}
|
||||
|
||||
-async function listS3Bucket(bucket, wantStorageClass) {
|
||||
- const client = backupPersistor._getClientForBucket(bucket)
|
||||
- const response = await client.listObjectsV2({ Bucket: bucket }).promise()
|
||||
-
|
||||
- for (const object of response.Contents || []) {
|
||||
- expect(object).to.have.property('StorageClass', wantStorageClass)
|
||||
- }
|
||||
-
|
||||
- return (response.Contents || []).map(item => item.Key || '')
|
||||
-}
|
||||
-
|
||||
function objectIdFromTime(timestamp) {
|
||||
return ObjectId.createFromTime(new Date(timestamp).getTime() / 1000)
|
||||
}
|
||||
@@ -591,11 +571,7 @@ describe('back_fill_file_hash script', function () {
|
||||
expect((await fs.promises.readdir('/tmp')).join(';')).to.not.match(
|
||||
/back_fill_file_hash/
|
||||
)
|
||||
- const extraStatsKeys = [
|
||||
- 'eventLoop',
|
||||
- 'readFromGCSThroughputMiBPerSecond',
|
||||
- 'writeToAWSThroughputMiBPerSecond',
|
||||
- ]
|
||||
+ const extraStatsKeys = ['eventLoop', 'readFromGCSThroughputMiBPerSecond']
|
||||
const stats = JSON.parse(
|
||||
result.stderr
|
||||
.split('\n')
|
||||
@@ -610,7 +586,6 @@ describe('back_fill_file_hash script', function () {
|
||||
delete stats.time
|
||||
if (shouldHaveWritten) {
|
||||
expect(stats.readFromGCSThroughputMiBPerSecond).to.be.greaterThan(0)
|
||||
- expect(stats.writeToAWSThroughputMiBPerSecond).to.be.greaterThan(0)
|
||||
}
|
||||
for (const key of extraStatsKeys) {
|
||||
delete stats[key]
|
||||
@@ -856,109 +831,6 @@ describe('back_fill_file_hash script', function () {
|
||||
},
|
||||
},
|
||||
])
|
||||
- expect(
|
||||
- (await backedUpBlobs.find({}, { sort: { _id: 1 } }).toArray()).map(
|
||||
- entry => {
|
||||
- // blobs are pushed unordered into mongo. Sort the list for consistency.
|
||||
- entry.blobs.sort()
|
||||
- return entry
|
||||
- }
|
||||
- )
|
||||
- ).to.deep.equal([
|
||||
- {
|
||||
- _id: projectId0,
|
||||
- blobs: [
|
||||
- binaryForGitBlobHash(gitBlobHash(fileId0)),
|
||||
- binaryForGitBlobHash(hashFile7),
|
||||
- binaryForGitBlobHash(hashTextBlob0),
|
||||
- ].sort(),
|
||||
- },
|
||||
- {
|
||||
- _id: projectId1,
|
||||
- blobs: [
|
||||
- binaryForGitBlobHash(gitBlobHash(fileId1)),
|
||||
- binaryForGitBlobHash(hashTextBlob1),
|
||||
- ].sort(),
|
||||
- },
|
||||
- {
|
||||
- _id: projectId2,
|
||||
- blobs: [binaryForGitBlobHash(hashTextBlob2)]
|
||||
- .concat(
|
||||
- processHashedFiles
|
||||
- ? [binaryForGitBlobHash(gitBlobHash(fileId2))]
|
||||
- : []
|
||||
- )
|
||||
- .sort(),
|
||||
- },
|
||||
- {
|
||||
- _id: projectIdDeleted0,
|
||||
- blobs: [binaryForGitBlobHash(gitBlobHash(fileId4))].sort(),
|
||||
- },
|
||||
- {
|
||||
- _id: projectId3,
|
||||
- blobs: [binaryForGitBlobHash(gitBlobHash(fileId3))].sort(),
|
||||
- },
|
||||
- ...(processHashedFiles
|
||||
- ? [
|
||||
- {
|
||||
- _id: projectIdDeleted1,
|
||||
- blobs: [binaryForGitBlobHash(gitBlobHash(fileId5))].sort(),
|
||||
- },
|
||||
- ]
|
||||
- : []),
|
||||
- {
|
||||
- _id: projectIdBadFileTree0,
|
||||
- blobs: [binaryForGitBlobHash(hashTextBlob3)].sort(),
|
||||
- },
|
||||
- {
|
||||
- _id: projectIdBadFileTree3,
|
||||
- blobs: [binaryForGitBlobHash(gitBlobHash(fileId9))].sort(),
|
||||
- },
|
||||
- ])
|
||||
- })
|
||||
- it('should have backed up all the files', async function () {
|
||||
- expect(tieringStorageClass).to.exist
|
||||
- const blobs = await listS3Bucket(projectBlobsBucket, tieringStorageClass)
|
||||
- expect(blobs.sort()).to.deep.equal(
|
||||
- Array.from(
|
||||
- new Set(
|
||||
- writtenBlobs
|
||||
- .map(({ historyId, fileId, hash }) =>
|
||||
- makeProjectKey(historyId, hash || gitBlobHash(fileId))
|
||||
- )
|
||||
- .sort()
|
||||
- )
|
||||
- )
|
||||
- )
|
||||
- for (let { historyId, fileId, hash, content } of writtenBlobs) {
|
||||
- hash = hash || gitBlobHash(fileId.toString())
|
||||
- const s = await backupPersistor.getObjectStream(
|
||||
- projectBlobsBucket,
|
||||
- makeProjectKey(historyId, hash),
|
||||
- { autoGunzip: true }
|
||||
- )
|
||||
- const buf = new WritableBuffer()
|
||||
- await Stream.promises.pipeline(s, buf)
|
||||
- expect(gitBlobHashBuffer(buf.getContents())).to.equal(hash)
|
||||
- if (content) {
|
||||
- expect(buf.getContents()).to.deep.equal(content)
|
||||
- } else {
|
||||
- const id = buf.getContents().toString('utf-8')
|
||||
- expect(id).to.equal(fileId.toString())
|
||||
- // double check we are not comparing 'undefined' or '[object Object]' above
|
||||
- expect(id).to.match(/^[a-f0-9]{24}$/)
|
||||
- }
|
||||
- }
|
||||
- const deks = await listS3Bucket(deksBucket, 'STANDARD')
|
||||
- expect(deks.sort()).to.deep.equal(
|
||||
- Array.from(
|
||||
- new Set(
|
||||
- writtenBlobs.map(
|
||||
- ({ historyId }) => projectKey.format(historyId) + '/dek'
|
||||
- )
|
||||
- )
|
||||
- ).sort()
|
||||
- )
|
||||
})
|
||||
it('should have written the back filled files to history v1', async function () {
|
||||
for (const { historyId, hash, fileId, content } of writtenBlobs) {
|
||||
@@ -991,14 +863,13 @@ describe('back_fill_file_hash script', function () {
|
||||
// We still need to iterate over all the projects and blobs.
|
||||
projects: 10,
|
||||
blobs: 10,
|
||||
- backedUpBlobs: 10,
|
||||
+
|
||||
badFileTrees: 4,
|
||||
}
|
||||
if (processHashedFiles) {
|
||||
stats = sumStats(stats, {
|
||||
...STATS_ALL_ZERO,
|
||||
blobs: 2,
|
||||
- backedUpBlobs: 2,
|
||||
})
|
||||
}
|
||||
expect(rerun.stats).deep.equal(stats)
|
||||
@@ -1024,7 +895,6 @@ describe('back_fill_file_hash script', function () {
|
||||
const STATS_ALL_ZERO = {
|
||||
projects: 0,
|
||||
blobs: 0,
|
||||
- backedUpBlobs: 0,
|
||||
filesWithHash: 0,
|
||||
filesWithoutHash: 0,
|
||||
filesDuplicated: 0,
|
||||
@@ -1038,21 +908,14 @@ describe('back_fill_file_hash script', function () {
|
||||
fileHardDeleted: 0,
|
||||
badFileTrees: 0,
|
||||
mongoUpdates: 0,
|
||||
- deduplicatedWriteToAWSLocalCount: 0,
|
||||
- deduplicatedWriteToAWSLocalEgress: 0,
|
||||
- deduplicatedWriteToAWSRemoteCount: 0,
|
||||
- deduplicatedWriteToAWSRemoteEgress: 0,
|
||||
readFromGCSCount: 0,
|
||||
readFromGCSIngress: 0,
|
||||
- writeToAWSCount: 0,
|
||||
- writeToAWSEgress: 0,
|
||||
writeToGCSCount: 0,
|
||||
writeToGCSEgress: 0,
|
||||
}
|
||||
const STATS_UP_TO_PROJECT1 = {
|
||||
projects: 2,
|
||||
blobs: 2,
|
||||
- backedUpBlobs: 0,
|
||||
filesWithHash: 0,
|
||||
filesWithoutHash: 5,
|
||||
filesDuplicated: 1,
|
||||
@@ -1065,22 +928,15 @@ describe('back_fill_file_hash script', function () {
|
||||
projectHardDeleted: 0,
|
||||
fileHardDeleted: 0,
|
||||
badFileTrees: 0,
|
||||
- mongoUpdates: 4,
|
||||
- deduplicatedWriteToAWSLocalCount: 0,
|
||||
- deduplicatedWriteToAWSLocalEgress: 0,
|
||||
- deduplicatedWriteToAWSRemoteCount: 0,
|
||||
- deduplicatedWriteToAWSRemoteEgress: 0,
|
||||
- readFromGCSCount: 6,
|
||||
- readFromGCSIngress: 4000086,
|
||||
- writeToAWSCount: 5,
|
||||
- writeToAWSEgress: 4026,
|
||||
+ mongoUpdates: 2, // 4-2 blobs written to backedUpBlobs collection
|
||||
+ readFromGCSCount: 4,
|
||||
+ readFromGCSIngress: 4000072,
|
||||
writeToGCSCount: 3,
|
||||
writeToGCSEgress: 4000048,
|
||||
}
|
||||
const STATS_UP_FROM_PROJECT1_ONWARD = {
|
||||
projects: 8,
|
||||
blobs: 2,
|
||||
- backedUpBlobs: 0,
|
||||
filesWithHash: 0,
|
||||
filesWithoutHash: 4,
|
||||
filesDuplicated: 0,
|
||||
@@ -1093,26 +949,18 @@ describe('back_fill_file_hash script', function () {
|
||||
projectHardDeleted: 0,
|
||||
fileHardDeleted: 0,
|
||||
badFileTrees: 4,
|
||||
- mongoUpdates: 8,
|
||||
- deduplicatedWriteToAWSLocalCount: 1,
|
||||
- deduplicatedWriteToAWSLocalEgress: 30,
|
||||
- deduplicatedWriteToAWSRemoteCount: 0,
|
||||
- deduplicatedWriteToAWSRemoteEgress: 0,
|
||||
- readFromGCSCount: 6,
|
||||
- readFromGCSIngress: 110,
|
||||
- writeToAWSCount: 5,
|
||||
- writeToAWSEgress: 143,
|
||||
+ mongoUpdates: 3, // previously 5 blobs written to backedUpBlobs collection
|
||||
+ readFromGCSCount: 4,
|
||||
+ readFromGCSIngress: 96,
|
||||
writeToGCSCount: 3,
|
||||
writeToGCSEgress: 72,
|
||||
}
|
||||
const STATS_FILES_HASHED_EXTRA = {
|
||||
...STATS_ALL_ZERO,
|
||||
filesWithHash: 2,
|
||||
- mongoUpdates: 2,
|
||||
+ mongoUpdates: 0, // previously 2 blobs written to backedUpBlobs collection
|
||||
readFromGCSCount: 2,
|
||||
readFromGCSIngress: 48,
|
||||
- writeToAWSCount: 2,
|
||||
- writeToAWSEgress: 60,
|
||||
writeToGCSCount: 2,
|
||||
writeToGCSEgress: 48,
|
||||
}
|
||||
@@ -1144,8 +992,6 @@ describe('back_fill_file_hash script', function () {
|
||||
...STATS_ALL_ZERO,
|
||||
filesFailed: 1,
|
||||
readFromGCSIngress: -24,
|
||||
- writeToAWSCount: -1,
|
||||
- writeToAWSEgress: -28,
|
||||
writeToGCSCount: -1,
|
||||
writeToGCSEgress: -24,
|
||||
})
|
||||
@@ -1269,13 +1115,14 @@ describe('back_fill_file_hash script', function () {
|
||||
before('run script with hashed files', async function () {
|
||||
output2 = await runScript(['--processHashedFiles=true'], {})
|
||||
})
|
||||
- it('should print stats', function () {
|
||||
+ it('should print stats for the first run without hashed files', function () {
|
||||
expect(output1.stats).deep.equal(STATS_ALL)
|
||||
+ })
|
||||
+ it('should print stats for the hashed files run', function () {
|
||||
expect(output2.stats).deep.equal({
|
||||
...STATS_FILES_HASHED_EXTRA,
|
||||
projects: 10,
|
||||
blobs: 10,
|
||||
- backedUpBlobs: 10,
|
||||
badFileTrees: 4,
|
||||
})
|
||||
})
|
||||
@@ -1322,9 +1169,7 @@ describe('back_fill_file_hash script', function () {
|
||||
...STATS_FILES_HASHED_EXTRA,
|
||||
readFromGCSCount: 3,
|
||||
readFromGCSIngress: 72,
|
||||
- deduplicatedWriteToAWSLocalCount: 1,
|
||||
- deduplicatedWriteToAWSLocalEgress: 30,
|
||||
- mongoUpdates: 1,
|
||||
+ mongoUpdates: 0,
|
||||
filesWithHash: 3,
|
||||
})
|
||||
)
|
||||
@@ -1354,48 +1199,6 @@ describe('back_fill_file_hash script', function () {
|
||||
expect(output.stats).deep.equal(
|
||||
sumStats(STATS_ALL, {
|
||||
...STATS_ALL_ZERO,
|
||||
- // one remote deduplicate
|
||||
- deduplicatedWriteToAWSRemoteCount: 1,
|
||||
- deduplicatedWriteToAWSRemoteEgress: 28,
|
||||
- writeToAWSEgress: -28, // subtract skipped egress
|
||||
- })
|
||||
- )
|
||||
- })
|
||||
- commonAssertions()
|
||||
- })
|
||||
-
|
||||
- describe('with something in the bucket and marked as processed', function () {
|
||||
- before('prepare environment', prepareEnvironment)
|
||||
- before('create a file in s3', async function () {
|
||||
- await backupPersistor.sendStream(
|
||||
- projectBlobsBucket,
|
||||
- makeProjectKey(historyId0, hashTextBlob0),
|
||||
- Stream.Readable.from([contentTextBlob0]),
|
||||
- { contentLength: contentTextBlob0.byteLength }
|
||||
- )
|
||||
- await backedUpBlobs.insertMany([
|
||||
- {
|
||||
- _id: projectId0,
|
||||
- blobs: [binaryForGitBlobHash(hashTextBlob0)],
|
||||
- },
|
||||
- ])
|
||||
- })
|
||||
- let output
|
||||
- before('run script', async function () {
|
||||
- output = await runScript([], {
|
||||
- CONCURRENCY: '1',
|
||||
- })
|
||||
- })
|
||||
-
|
||||
- it('should print stats', function () {
|
||||
- expect(output.stats).deep.equal(
|
||||
- sumStats(STATS_ALL, {
|
||||
- ...STATS_ALL_ZERO,
|
||||
- backedUpBlobs: 1,
|
||||
- writeToAWSCount: -1,
|
||||
- writeToAWSEgress: -27,
|
||||
- readFromGCSCount: -1,
|
||||
- readFromGCSIngress: -7,
|
||||
})
|
||||
)
|
||||
})
|
||||
@@ -1418,8 +1221,10 @@ describe('back_fill_file_hash script', function () {
|
||||
})
|
||||
})
|
||||
|
||||
- it('should print stats', function () {
|
||||
+ it('should print stats for part 0', function () {
|
||||
expect(outputPart0.stats).to.deep.equal(STATS_UP_TO_PROJECT1)
|
||||
+ })
|
||||
+ it('should print stats for part 1', function () {
|
||||
expect(outputPart1.stats).to.deep.equal(STATS_UP_FROM_PROJECT1_ONWARD)
|
||||
})
|
||||
commonAssertions()
|
||||
|
||||
|
||||
|
||||
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
index 3be1c8a5407..c9ed13c6cb4 100644
|
||||
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
@@ -388,12 +388,6 @@ async function processFileOnce(entry, filePath) {
|
||||
fileId,
|
||||
} = entry
|
||||
const blobStore = new BlobStore(historyId)
|
||||
- if (entry.blob) {
|
||||
- const { blob } = entry
|
||||
- const hash = blob.getHash()
|
||||
- return hash
|
||||
- }
|
||||
-
|
||||
STATS.readFromGCSCount++
|
||||
// make a fetch request to filestore itself
|
||||
const src = await fetchFromFilestore(projectId, fileId)
|
||||
@@ -784,16 +778,6 @@ function* findFileInBatch(projects, prefix, blobs) {
|
||||
const historyIdS = project.overleaf.history.id.toString()
|
||||
const projectBlobs = blobs.get(historyIdS) || []
|
||||
const ctx = new ProjectContext(project._id, historyIdS, projectBlobs)
|
||||
- for (const blob of projectBlobs) {
|
||||
- ctx.remainingQueueEntries++
|
||||
- yield {
|
||||
- ctx,
|
||||
- cacheKey: blob.getHash(),
|
||||
- path: MONGO_PATH_SKIP_WRITE_HASH_TO_FILE_TREE,
|
||||
- blob,
|
||||
- hash: blob.getHash(),
|
||||
- }
|
||||
- }
|
||||
try {
|
||||
yield* findFiles(ctx, project.rootFolder?.[0], prefix, true)
|
||||
} catch (err) {
|
||||
|
||||
|
||||
|
||||
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
index c9ed13c6cb4..f24ce4a6605 100644
|
||||
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
@@ -387,6 +387,13 @@ async function processFileOnce(entry, filePath) {
|
||||
ctx: { projectId, historyId },
|
||||
fileId,
|
||||
} = entry
|
||||
+ if (entry.hash && entry.ctx.hasCompletedBlob(entry.hash)) {
|
||||
+ // We can enter this case for two identical files in the same project,
|
||||
+ // one with hash, the other without. When the one without hash gets
|
||||
+ // processed first, we can skip downloading the other one we already
|
||||
+ // know the hash of.
|
||||
+ return entry.hash
|
||||
+ }
|
||||
const blobStore = new BlobStore(historyId)
|
||||
STATS.readFromGCSCount++
|
||||
// make a fetch request to filestore itself
|
||||
|
||||
|
||||
|
||||
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
index f24ce4a6605..0ccadaf5a95 100644
|
||||
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
@@ -559,8 +559,9 @@ async function processBatch(batch, prefix = 'rootFolder.0') {
|
||||
blobs.clear()
|
||||
|
||||
// The files are currently ordered by project-id.
|
||||
- // Order them by file-id ASC then blobs ASC to
|
||||
- // - process files before blobs
|
||||
+ // Order them by file-id ASC then hash ASC to
|
||||
+ // increase the hit rate on the "already processed
|
||||
+ // hash for project" checks.
|
||||
files.sort(
|
||||
/**
|
||||
* @param {QueueEntry} a
|
||||
|
||||
191
server-ce/hotfix/5.5.3/pr_27230.patch
Normal file
191
server-ce/hotfix/5.5.3/pr_27230.patch
Normal file
@@ -0,0 +1,191 @@
|
||||
|
||||
|
||||
diff --git a/services/web/app.mjs b/services/web/app.mjs
|
||||
index b7c723da3d77..3f54cc36a8c3 100644
|
||||
--- a/services/web/app.mjs
|
||||
+++ b/services/web/app.mjs
|
||||
@@ -56,14 +56,8 @@ if (Settings.catchErrors) {
|
||||
// Create ./data/dumpFolder if needed
|
||||
FileWriter.ensureDumpFolderExists()
|
||||
|
||||
-if (
|
||||
- !Features.hasFeature('project-history-blobs') &&
|
||||
- !Features.hasFeature('filestore')
|
||||
-) {
|
||||
- throw new Error(
|
||||
- 'invalid config: must enable either project-history-blobs (Settings.enableProjectHistoryBlobs=true) or enable filestore (Settings.disableFilestore=false)'
|
||||
- )
|
||||
-}
|
||||
+// Validate combination of feature flags.
|
||||
+Features.validateSettings()
|
||||
|
||||
// handle SIGTERM for graceful shutdown in kubernetes
|
||||
process.on('SIGTERM', function (signal) {
|
||||
diff --git a/services/web/app/src/Features/History/HistoryURLHelper.js b/services/web/app/src/Features/History/HistoryURLHelper.js
|
||||
index 8b8d8cbdd730..acb43ced68e0 100644
|
||||
--- a/services/web/app/src/Features/History/HistoryURLHelper.js
|
||||
+++ b/services/web/app/src/Features/History/HistoryURLHelper.js
|
||||
@@ -8,7 +8,7 @@ function projectHistoryURLWithFilestoreFallback(
|
||||
) {
|
||||
const filestoreURL = `${Settings.apis.filestore.url}/project/${projectId}/file/${fileRef._id}?from=${origin}`
|
||||
// TODO: When this file is converted to ES modules we will be able to use Features.hasFeature('project-history-blobs'). Currently we can't stub the feature return value in tests.
|
||||
- if (fileRef.hash && Settings.enableProjectHistoryBlobs) {
|
||||
+ if (fileRef.hash && Settings.filestoreMigrationLevel >= 1) {
|
||||
return {
|
||||
url: `${Settings.apis.project_history.url}/project/${historyId}/blob/${fileRef.hash}`,
|
||||
fallbackURL: filestoreURL,
|
||||
diff --git a/services/web/app/src/infrastructure/Features.js b/services/web/app/src/infrastructure/Features.js
|
||||
index aaf51103b9b8..89c8e6b841d0 100644
|
||||
--- a/services/web/app/src/infrastructure/Features.js
|
||||
+++ b/services/web/app/src/infrastructure/Features.js
|
||||
@@ -19,8 +19,7 @@ const trackChangesModuleAvailable =
|
||||
* @property {boolean | undefined} enableGithubSync
|
||||
* @property {boolean | undefined} enableGitBridge
|
||||
* @property {boolean | undefined} enableHomepage
|
||||
- * @property {boolean | undefined} enableProjectHistoryBlobs
|
||||
- * @property {boolean | undefined} disableFilestore
|
||||
+ * @property {number} filestoreMigrationLevel
|
||||
* @property {boolean | undefined} enableSaml
|
||||
* @property {boolean | undefined} ldap
|
||||
* @property {boolean | undefined} oauth
|
||||
@@ -29,7 +28,39 @@ const trackChangesModuleAvailable =
|
||||
* @property {boolean | undefined} saml
|
||||
*/
|
||||
|
||||
+/**
|
||||
+ * @return {{'project-history-blobs': boolean, filestore: boolean}}
|
||||
+ */
|
||||
+function getFilestoreMigrationOptions() {
|
||||
+ switch (Settings.filestoreMigrationLevel) {
|
||||
+ case 0:
|
||||
+ return {
|
||||
+ 'project-history-blobs': false,
|
||||
+ filestore: true,
|
||||
+ }
|
||||
+ case 1:
|
||||
+ return {
|
||||
+ 'project-history-blobs': true,
|
||||
+ filestore: true,
|
||||
+ }
|
||||
+
|
||||
+ case 2:
|
||||
+ return {
|
||||
+ 'project-history-blobs': true,
|
||||
+ filestore: false,
|
||||
+ }
|
||||
+ default:
|
||||
+ throw new Error(
|
||||
+ `invalid OVERLEAF_FILESTORE_MIGRATION_LEVEL=${Settings.filestoreMigrationLevel}, expected 0, 1 or 2`
|
||||
+ )
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
const Features = {
|
||||
+ validateSettings() {
|
||||
+ getFilestoreMigrationOptions() // throws for invalid settings
|
||||
+ },
|
||||
+
|
||||
/**
|
||||
* @returns {boolean}
|
||||
*/
|
||||
@@ -89,9 +120,9 @@ const Features = {
|
||||
Settings.enabledLinkedFileTypes.includes('url')
|
||||
)
|
||||
case 'project-history-blobs':
|
||||
- return Boolean(Settings.enableProjectHistoryBlobs)
|
||||
+ return getFilestoreMigrationOptions()['project-history-blobs']
|
||||
case 'filestore':
|
||||
- return Boolean(Settings.disableFilestore) === false
|
||||
+ return getFilestoreMigrationOptions().filestore
|
||||
case 'support':
|
||||
return supportModuleAvailable
|
||||
case 'symbol-palette':
|
||||
diff --git a/services/web/config/settings.defaults.js b/services/web/config/settings.defaults.js
|
||||
index bd0730d5d00c..4df63ebd7c6c 100644
|
||||
--- a/services/web/config/settings.defaults.js
|
||||
+++ b/services/web/config/settings.defaults.js
|
||||
@@ -440,6 +440,9 @@ module.exports = {
|
||||
','
|
||||
),
|
||||
|
||||
+ filestoreMigrationLevel:
|
||||
+ parseInt(process.env.OVERLEAF_FILESTORE_MIGRATION_LEVEL, 10) || 0,
|
||||
+
|
||||
// i18n
|
||||
// ------
|
||||
//
|
||||
|
||||
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
index 0ccadaf5a955..2e12328e5c49 100644
|
||||
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
@@ -150,10 +150,6 @@ const CONCURRENT_BATCHES = parseInt(process.env.CONCURRENT_BATCHES || '2', 10)
|
||||
const RETRIES = parseInt(process.env.RETRIES || '10', 10)
|
||||
const RETRY_DELAY_MS = parseInt(process.env.RETRY_DELAY_MS || '100', 10)
|
||||
|
||||
-const USER_FILES_BUCKET_NAME = process.env.USER_FILES_BUCKET_NAME || ''
|
||||
-if (!USER_FILES_BUCKET_NAME) {
|
||||
- throw new Error('env var USER_FILES_BUCKET_NAME is missing')
|
||||
-}
|
||||
const RETRY_FILESTORE_404 = process.env.RETRY_FILESTORE_404 === 'true'
|
||||
const BUFFER_DIR = fs.mkdtempSync(
|
||||
process.env.BUFFER_DIR_PREFIX || '/tmp/back_fill_file_hash-'
|
||||
|
||||
diff --git a/services/web/app/src/infrastructure/Features.js b/services/web/app/src/infrastructure/Features.js
|
||||
index 89c8e6b841d0..6147e70e0faf 100644
|
||||
--- a/services/web/app/src/infrastructure/Features.js
|
||||
+++ b/services/web/app/src/infrastructure/Features.js
|
||||
@@ -28,37 +28,13 @@ const trackChangesModuleAvailable =
|
||||
* @property {boolean | undefined} saml
|
||||
*/
|
||||
|
||||
-/**
|
||||
- * @return {{'project-history-blobs': boolean, filestore: boolean}}
|
||||
- */
|
||||
-function getFilestoreMigrationOptions() {
|
||||
- switch (Settings.filestoreMigrationLevel) {
|
||||
- case 0:
|
||||
- return {
|
||||
- 'project-history-blobs': false,
|
||||
- filestore: true,
|
||||
- }
|
||||
- case 1:
|
||||
- return {
|
||||
- 'project-history-blobs': true,
|
||||
- filestore: true,
|
||||
- }
|
||||
-
|
||||
- case 2:
|
||||
- return {
|
||||
- 'project-history-blobs': true,
|
||||
- filestore: false,
|
||||
- }
|
||||
- default:
|
||||
+const Features = {
|
||||
+ validateSettings() {
|
||||
+ if (![0, 1, 2].includes(Settings.filestoreMigrationLevel)) {
|
||||
throw new Error(
|
||||
`invalid OVERLEAF_FILESTORE_MIGRATION_LEVEL=${Settings.filestoreMigrationLevel}, expected 0, 1 or 2`
|
||||
)
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-const Features = {
|
||||
- validateSettings() {
|
||||
- getFilestoreMigrationOptions() // throws for invalid settings
|
||||
+ }
|
||||
},
|
||||
|
||||
/**
|
||||
@@ -120,9 +96,9 @@ const Features = {
|
||||
Settings.enabledLinkedFileTypes.includes('url')
|
||||
)
|
||||
case 'project-history-blobs':
|
||||
- return getFilestoreMigrationOptions()['project-history-blobs']
|
||||
+ return Settings.filestoreMigrationLevel > 0
|
||||
case 'filestore':
|
||||
- return getFilestoreMigrationOptions().filestore
|
||||
+ return Settings.filestoreMigrationLevel < 2
|
||||
case 'support':
|
||||
return supportModuleAvailable
|
||||
case 'symbol-palette':
|
||||
84
server-ce/hotfix/5.5.3/pr_27240.patch
Normal file
84
server-ce/hotfix/5.5.3/pr_27240.patch
Normal file
@@ -0,0 +1,84 @@
|
||||
diff --git a/cron/deactivate-projects.sh b/cron/deactivate-projects.sh
|
||||
index fab0fbfbf667..a391f99a5bd8 100755
|
||||
--- a/cron/deactivate-projects.sh
|
||||
+++ b/cron/deactivate-projects.sh
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
-set -eux
|
||||
+set -eu
|
||||
|
||||
echo "-------------------------"
|
||||
echo "Deactivating old projects"
|
||||
diff --git a/cron/delete-projects.sh b/cron/delete-projects.sh
|
||||
index e1ea5ac5e621..7cd45771716a 100755
|
||||
--- a/cron/delete-projects.sh
|
||||
+++ b/cron/delete-projects.sh
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
-set -eux
|
||||
+set -eu
|
||||
|
||||
echo "-------------------------"
|
||||
echo "Expiring deleted projects"
|
||||
diff --git a/cron/delete-users.sh b/cron/delete-users.sh
|
||||
index fe97bffeea0b..30872ac55657 100755
|
||||
--- a/cron/delete-users.sh
|
||||
+++ b/cron/delete-users.sh
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
-set -eux
|
||||
+set -eu
|
||||
|
||||
echo "----------------------"
|
||||
echo "Expiring deleted users"
|
||||
diff --git a/cron/project-history-flush-all.sh b/cron/project-history-flush-all.sh
|
||||
index d8bbb184aa37..8fe9eea5fc55 100755
|
||||
--- a/cron/project-history-flush-all.sh
|
||||
+++ b/cron/project-history-flush-all.sh
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
-set -eux
|
||||
+set -eu
|
||||
|
||||
echo "---------------------------------"
|
||||
echo "Flush all project-history changes"
|
||||
diff --git a/cron/project-history-periodic-flush.sh b/cron/project-history-periodic-flush.sh
|
||||
index 76feae410e26..1b8efff6cc7c 100755
|
||||
--- a/cron/project-history-periodic-flush.sh
|
||||
+++ b/cron/project-history-periodic-flush.sh
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
-set -eux
|
||||
+set -eu
|
||||
|
||||
echo "--------------------------"
|
||||
echo "Flush project-history queue"
|
||||
diff --git a/cron/project-history-retry-hard.sh b/cron/project-history-retry-hard.sh
|
||||
index 651a6615f22d..df9b4703a58e 100755
|
||||
--- a/cron/project-history-retry-hard.sh
|
||||
+++ b/cron/project-history-retry-hard.sh
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
-set -eux
|
||||
+set -eu
|
||||
|
||||
echo "-----------------------------------"
|
||||
echo "Retry project-history errors (hard)"
|
||||
diff --git a/cron/project-history-retry-soft.sh b/cron/project-history-retry-soft.sh
|
||||
index 70c597021b28..cbb6e714cae7 100755
|
||||
--- a/cron/project-history-retry-soft.sh
|
||||
+++ b/cron/project-history-retry-soft.sh
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
-set -eux
|
||||
+set -eu
|
||||
|
||||
echo "-----------------------------------"
|
||||
echo "Retry project-history errors (soft)"
|
||||
76
server-ce/hotfix/5.5.3/pr_27249.patch
Normal file
76
server-ce/hotfix/5.5.3/pr_27249.patch
Normal file
@@ -0,0 +1,76 @@
|
||||
|
||||
|
||||
diff --git a/package-lock.json b/package-lock.json
|
||||
index 2b3a5868a20..d9d8285618d 100644
|
||||
--- a/package-lock.json
|
||||
+++ b/package-lock.json
|
||||
@@ -35581,6 +35581,7 @@
|
||||
"resolved": "https://registry.npmjs.org/request/-/request-2.88.2.tgz",
|
||||
"integrity": "sha512-MsvtOrfG9ZcrOwAW+Qi+F6HbD0CWXEh9ou77uOb7FM2WPhwT7smM833PzanhJLsgXjN89Ir6V2PczXNnMpwKhw==",
|
||||
"deprecated": "request has been deprecated, see https://github.com/request/request/issues/3142",
|
||||
+ "license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"aws-sign2": "~0.7.0",
|
||||
"aws4": "^1.8.0",
|
||||
@@ -35638,15 +35639,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/request/node_modules/tough-cookie": {
|
||||
- "version": "2.5.0",
|
||||
- "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.5.0.tgz",
|
||||
- "integrity": "sha512-nlLsUzgm1kfLXSXfRZMc1KLAugd4hqJHDTvc2hDIwS3mZAfMEuMbc03SujMF+GEcpaX/qboeycw6iO8JwVv2+g==",
|
||||
+ "version": "5.1.2",
|
||||
+ "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-5.1.2.tgz",
|
||||
+ "integrity": "sha512-FVDYdxtnj0G6Qm/DhNPSb8Ju59ULcup3tuJxkFb5K8Bv2pUXILbf0xZWU8PX8Ov19OXljbUyveOFwRMwkXzO+A==",
|
||||
+ "license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
- "psl": "^1.1.28",
|
||||
- "punycode": "^2.1.1"
|
||||
+ "tldts": "^6.1.32"
|
||||
},
|
||||
"engines": {
|
||||
- "node": ">=0.8"
|
||||
+ "node": ">=16"
|
||||
}
|
||||
},
|
||||
"node_modules/requestretry": {
|
||||
@@ -39612,6 +39613,24 @@
|
||||
"tlds": "bin.js"
|
||||
}
|
||||
},
|
||||
+ "node_modules/tldts": {
|
||||
+ "version": "6.1.86",
|
||||
+ "resolved": "https://registry.npmjs.org/tldts/-/tldts-6.1.86.tgz",
|
||||
+ "integrity": "sha512-WMi/OQ2axVTf/ykqCQgXiIct+mSQDFdH2fkwhPwgEwvJ1kSzZRiinb0zF2Xb8u4+OqPChmyI6MEu4EezNJz+FQ==",
|
||||
+ "license": "MIT",
|
||||
+ "dependencies": {
|
||||
+ "tldts-core": "^6.1.86"
|
||||
+ },
|
||||
+ "bin": {
|
||||
+ "tldts": "bin/cli.js"
|
||||
+ }
|
||||
+ },
|
||||
+ "node_modules/tldts-core": {
|
||||
+ "version": "6.1.86",
|
||||
+ "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-6.1.86.tgz",
|
||||
+ "integrity": "sha512-Je6p7pkk+KMzMv2XXKmAE3McmolOQFdxkKw0R8EYNr7sELW46JqnNeTX8ybPiQgvg1ymCoF8LXs5fzFaZvJPTA==",
|
||||
+ "license": "MIT"
|
||||
+ },
|
||||
"node_modules/tmp": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.3.tgz",
|
||||
diff --git a/package.json b/package.json
|
||||
index 388b750c3d2..44fffc4664a 100644
|
||||
--- a/package.json
|
||||
+++ b/package.json
|
||||
@@ -33,6 +33,9 @@
|
||||
"multer": "2.0.1",
|
||||
"path-to-regexp": "3.3.0",
|
||||
"qs": "6.13.0"
|
||||
+ },
|
||||
+ "request@2.88.2": {
|
||||
+ "tough-cookie": "5.1.2"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
|
||||
1469
server-ce/hotfix/5.5.3/pr_27257.patch
Normal file
1469
server-ce/hotfix/5.5.3/pr_27257.patch
Normal file
File diff suppressed because it is too large
Load Diff
82
server-ce/hotfix/5.5.3/pr_27273.patch
Normal file
82
server-ce/hotfix/5.5.3/pr_27273.patch
Normal file
@@ -0,0 +1,82 @@
|
||||
|
||||
|
||||
diff --git a/services/web/frontend/js/features/review-panel-new/components/review-tooltip-menu.tsx b/services/web/frontend/js/features/review-panel-new/components/review-tooltip-menu.tsx
|
||||
index f26542ebe909..fb6b68460bdc 100644
|
||||
--- a/services/web/frontend/js/features/review-panel-new/components/review-tooltip-menu.tsx
|
||||
+++ b/services/web/frontend/js/features/review-panel-new/components/review-tooltip-menu.tsx
|
||||
@@ -18,7 +18,6 @@ import {
|
||||
reviewTooltipStateField,
|
||||
} from '@/features/source-editor/extensions/review-tooltip'
|
||||
import { EditorView, getTooltip } from '@codemirror/view'
|
||||
-import useViewerPermissions from '@/shared/hooks/use-viewer-permissions'
|
||||
import usePreviousValue from '@/shared/hooks/use-previous-value'
|
||||
import { useLayoutContext } from '@/shared/context/layout-context'
|
||||
import { useReviewPanelViewActionsContext } from '../context/review-panel-view-context'
|
||||
@@ -35,6 +34,7 @@ import { useEditorPropertiesContext } from '@/features/ide-react/context/editor-
|
||||
import classNames from 'classnames'
|
||||
import useEventListener from '@/shared/hooks/use-event-listener'
|
||||
import useReviewPanelLayout from '../hooks/use-review-panel-layout'
|
||||
+import { usePermissionsContext } from '@/features/ide-react/context/permissions-context'
|
||||
|
||||
const EDIT_MODE_SWITCH_WIDGET_HEIGHT = 40
|
||||
const CM_LINE_RIGHT_PADDING = 8
|
||||
@@ -43,7 +43,7 @@ const TOOLTIP_SHOW_DELAY = 120
|
||||
const ReviewTooltipMenu: FC = () => {
|
||||
const state = useCodeMirrorStateContext()
|
||||
const view = useCodeMirrorViewContext()
|
||||
- const isViewer = useViewerPermissions()
|
||||
+ const permissions = usePermissionsContext()
|
||||
const [show, setShow] = useState(true)
|
||||
const { setView } = useReviewPanelViewActionsContext()
|
||||
const { openReviewPanel } = useReviewPanelLayout()
|
||||
@@ -58,7 +58,7 @@ const ReviewTooltipMenu: FC = () => {
|
||||
|
||||
const addComment = useCallback(() => {
|
||||
const { main } = view.state.selection
|
||||
- if (main.empty) {
|
||||
+ if (main.empty || !permissions.comment) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -74,11 +74,11 @@ const ReviewTooltipMenu: FC = () => {
|
||||
|
||||
view.dispatch({ effects })
|
||||
setShow(false)
|
||||
- }, [openReviewPanel, setView, setShow, view])
|
||||
+ }, [view, permissions.comment, openReviewPanel, setView])
|
||||
|
||||
useEventListener('add-new-review-comment', addComment)
|
||||
|
||||
- if (isViewer || !show || !tooltipState) {
|
||||
+ if (!permissions.comment || !show || !tooltipState) {
|
||||
return null
|
||||
}
|
||||
|
||||
diff --git a/services/web/frontend/js/features/source-editor/components/toolbar/toolbar-items.tsx b/services/web/frontend/js/features/source-editor/components/toolbar/toolbar-items.tsx
|
||||
index 3404976d4462..1811ccc99950 100644
|
||||
--- a/services/web/frontend/js/features/source-editor/components/toolbar/toolbar-items.tsx
|
||||
+++ b/services/web/frontend/js/features/source-editor/components/toolbar/toolbar-items.tsx
|
||||
@@ -16,5 +16,6 @@ import { isSplitTestEnabled } from '@/utils/splitTestUtils'
|
||||
import { isMac } from '@/shared/utils/os'
|
||||
import { useProjectContext } from '@/shared/context/project-context'
|
||||
+import { usePermissionsContext } from '@/features/ide-react/context/permissions-context'
|
||||
|
||||
export const ToolbarItems: FC<{
|
||||
state: EditorState
|
||||
@@ -35,6 +36,7 @@ export const ToolbarItems: FC<{
|
||||
useEditorPropertiesContext()
|
||||
const { writefullInstance } = useEditorContext()
|
||||
const { features } = useProjectContext()
|
||||
+ const permissions = usePermissionsContext()
|
||||
const isActive = withinFormattingCommand(state)
|
||||
|
||||
const symbolPaletteAvailable = getMeta('ol-symbolPaletteAvailable')
|
||||
@@ -131,7 +133,7 @@ export const ToolbarItems: FC<{
|
||||
command={commands.wrapInHref}
|
||||
icon="add_link"
|
||||
/>
|
||||
- {features.trackChangesVisible && (
|
||||
+ {features.trackChangesVisible && permissions.comment && (
|
||||
<ToolbarButton
|
||||
id="toolbar-add-comment"
|
||||
label={t('add_comment')}
|
||||
673
server-ce/hotfix/5.5.3/pr_27397.patch
Normal file
673
server-ce/hotfix/5.5.3/pr_27397.patch
Normal file
@@ -0,0 +1,673 @@
|
||||
|
||||
|
||||
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
index c0fdda35d8f..09212d426e3 100644
|
||||
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
@@ -83,7 +83,7 @@ ObjectId.cacheHexString = true
|
||||
*/
|
||||
function parseArgs() {
|
||||
const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z')
|
||||
- const DEFAULT_OUTPUT_FILE = `file-migration-${new Date()
|
||||
+ const DEFAULT_OUTPUT_FILE = `/var/log/overleaf/file-migration-${new Date()
|
||||
.toISOString()
|
||||
.replace(/[:.]/g, '_')}.log`
|
||||
|
||||
@@ -208,7 +208,7 @@ is equivalent to
|
||||
PROCESS_HASHED_FILES: !args['skip-hashed-files'],
|
||||
PROCESS_BLOBS: !args['skip-existing-blobs'],
|
||||
DRY_RUN: args['dry-run'],
|
||||
- OUTPUT_FILE: args.output,
|
||||
+ OUTPUT_FILE: args.report ? '-' : args.output,
|
||||
BATCH_RANGE_START,
|
||||
BATCH_RANGE_END,
|
||||
LOGGING_IDENTIFIER: args['logging-id'] || BATCH_RANGE_START,
|
||||
@@ -256,6 +256,9 @@ const LOGGING_INTERVAL = parseInt(process.env.LOGGING_INTERVAL || '60000', 10)
|
||||
const SLEEP_BEFORE_EXIT = parseInt(process.env.SLEEP_BEFORE_EXIT || '1000', 10)
|
||||
|
||||
// Log output to a file
|
||||
+if (OUTPUT_FILE !== '-') {
|
||||
+ console.warn(`Writing logs into ${OUTPUT_FILE}`)
|
||||
+}
|
||||
logger.initialize('file-migration', {
|
||||
streams: [
|
||||
{
|
||||
diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
index f6f4a6fb76d..c661ae9bc3f 100644
|
||||
--- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
+++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
@@ -501,6 +501,7 @@ describe('back_fill_file_hash script', function () {
|
||||
timeout: TIMEOUT - 500,
|
||||
env: {
|
||||
...process.env,
|
||||
+ AWS_SDK_JS_SUPPRESS_MAINTENANCE_MODE_MESSAGE: '1',
|
||||
USER_FILES_BUCKET_NAME,
|
||||
SLEEP_BEFORE_EXIT: '1',
|
||||
...env,
|
||||
@@ -516,6 +517,7 @@ describe('back_fill_file_hash script', function () {
|
||||
}
|
||||
result = { stdout, stderr, status: code }
|
||||
}
|
||||
+ // Ensure no tmp folder is left behind.
|
||||
expect((await fs.promises.readdir('/tmp')).join(';')).to.not.match(
|
||||
/back_fill_file_hash/
|
||||
)
|
||||
|
||||
|
||||
|
||||
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
index 09212d426e3..de4fca51db4 100644
|
||||
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
@@ -1373,7 +1373,18 @@ async function main() {
|
||||
console.warn('Done.')
|
||||
}
|
||||
|
||||
+async function cleanupBufferDir() {
|
||||
+ try {
|
||||
+ // Perform non-recursive removal of the BUFFER_DIR. Individual files
|
||||
+ // should get removed in parallel as part of batch processing.
|
||||
+ await fs.promises.rmdir(BUFFER_DIR)
|
||||
+ } catch (err) {
|
||||
+ console.error(`cleanup of BUFFER_DIR=${BUFFER_DIR} failed`, err)
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
if (DISPLAY_REPORT) {
|
||||
+ await cleanupBufferDir()
|
||||
console.warn('Displaying report...')
|
||||
await displayReport()
|
||||
process.exit(0)
|
||||
@@ -1384,13 +1395,7 @@ try {
|
||||
await main()
|
||||
} finally {
|
||||
printStats(true)
|
||||
- try {
|
||||
- // Perform non-recursive removal of the BUFFER_DIR. Individual files
|
||||
- // should get removed in parallel as part of batch processing.
|
||||
- await fs.promises.rmdir(BUFFER_DIR)
|
||||
- } catch (err) {
|
||||
- console.error(`cleanup of BUFFER_DIR=${BUFFER_DIR} failed`, err)
|
||||
- }
|
||||
+ await cleanupBufferDir()
|
||||
}
|
||||
|
||||
let code = 0
|
||||
|
||||
|
||||
|
||||
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
index de4fca51db4..e9a7721944c 100644
|
||||
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
@@ -316,6 +316,7 @@ async function getStatsForCollection(
|
||||
projectsWithAllHashes: 0,
|
||||
fileCount: 0,
|
||||
fileWithHashCount: 0,
|
||||
+ fileMissingInHistoryCount: 0,
|
||||
}
|
||||
// Pick a random sample of projects and estimate the number of files without hashes
|
||||
const result = await collection
|
||||
@@ -342,25 +343,43 @@ async function getStatsForCollection(
|
||||
const filesWithoutHash = fileTree.match(/\{"_id":"[0-9a-f]{24}"\}/g) || []
|
||||
// count the number of files with a hash, these are uniquely identified
|
||||
// by the number of "hash" strings due to the filtering
|
||||
- const filesWithHash = fileTree.match(/"hash"/g) || []
|
||||
+ const filesWithHash = fileTree.match(/"hash":"[0-9a-f]{40}"/g) || []
|
||||
stats.fileCount += filesWithoutHash.length + filesWithHash.length
|
||||
stats.fileWithHashCount += filesWithHash.length
|
||||
stats.projectCount++
|
||||
stats.projectsWithAllHashes += filesWithoutHash.length === 0 ? 1 : 0
|
||||
+ const projectId = project._id.toString()
|
||||
+ const { blobs: perProjectBlobs } = await getProjectBlobsBatch([projectId])
|
||||
+ const blobs = new Set(
|
||||
+ (perProjectBlobs.get(projectId) || []).map(b => b.getHash())
|
||||
+ )
|
||||
+ const uniqueHashes = new Set(filesWithHash.map(m => m.slice(8, 48)))
|
||||
+ for (const hash of uniqueHashes) {
|
||||
+ if (blobs.has(hash) || GLOBAL_BLOBS.has(hash)) continue
|
||||
+ stats.fileMissingInHistoryCount++
|
||||
+ }
|
||||
}
|
||||
console.log(`Sampled stats for ${name}:`)
|
||||
const fractionSampled = stats.projectCount / collectionCount
|
||||
- const percentageSampled = (fractionSampled * 100).toFixed(1)
|
||||
+ const percentageSampled = (fractionSampled * 100).toFixed(0)
|
||||
const fractionConverted = stats.projectsWithAllHashes / stats.projectCount
|
||||
- const percentageConverted = (fractionConverted * 100).toFixed(1)
|
||||
+ const percentageConverted = (fractionConverted * 100).toFixed(0)
|
||||
+ const fractionMissing = stats.fileMissingInHistoryCount / stats.fileCount
|
||||
+ const percentageMissing = (fractionMissing * 100).toFixed(0)
|
||||
console.log(
|
||||
- `- Sampled ${name}: ${stats.projectCount} (${percentageSampled}%)`
|
||||
+ `- Sampled ${name}: ${stats.projectCount} (${percentageSampled}% of all ${name})`
|
||||
)
|
||||
console.log(
|
||||
`- Sampled ${name} with all hashes present: ${stats.projectsWithAllHashes}`
|
||||
)
|
||||
console.log(
|
||||
- `- Percentage of ${name} converted: ${percentageConverted}% (estimated)`
|
||||
+ `- Percentage of ${name} that need back-filling hashes: ${percentageConverted}% (estimated)`
|
||||
+ )
|
||||
+ console.log(
|
||||
+ `- Sampled ${name} have ${stats.fileCount} files that need to be checked against the full project history system.`
|
||||
+ )
|
||||
+ console.log(
|
||||
+ `- Sampled ${name} have ${stats.fileMissingInHistoryCount} files that need to be uploaded to the full project history system (estimating ${percentageMissing}% of all files).`
|
||||
)
|
||||
}
|
||||
|
||||
@@ -369,13 +388,15 @@ async function getStatsForCollection(
|
||||
* including counts and estimated progress based on a sample.
|
||||
*/
|
||||
async function displayReport() {
|
||||
- const projectsCountResult = await projectsCollection.countDocuments()
|
||||
+ const projectsCountResult = await projectsCollection.estimatedDocumentCount()
|
||||
const deletedProjectsCountResult =
|
||||
- await deletedProjectsCollection.countDocuments()
|
||||
+ await deletedProjectsCollection.estimatedDocumentCount()
|
||||
const sampleSize = 1000
|
||||
console.log('Current status:')
|
||||
- console.log(`- Projects: ${projectsCountResult}`)
|
||||
- console.log(`- Deleted projects: ${deletedProjectsCountResult}`)
|
||||
+ console.log(`- Total number of projects: ${projectsCountResult}`)
|
||||
+ console.log(
|
||||
+ `- Total number of deleted projects: ${deletedProjectsCountResult}`
|
||||
+ )
|
||||
console.log(`Sampling ${sampleSize} projects to estimate progress...`)
|
||||
await getStatsForCollection(
|
||||
sampleSize,
|
||||
diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
index c661ae9bc3f..7248e74cb3f 100644
|
||||
--- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
+++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
@@ -481,21 +481,14 @@ describe('back_fill_file_hash script', function () {
|
||||
/**
|
||||
* @param {Array<string>} args
|
||||
* @param {Record<string, string>} env
|
||||
- * @param {boolean} shouldHaveWritten
|
||||
- * @return {Promise<{result, stats: any}>}
|
||||
+ * @return {Promise<{result: { stdout: string, stderr: string, status: number }, stats: any}>}
|
||||
*/
|
||||
- async function tryRunScript(args = [], env = {}, shouldHaveWritten) {
|
||||
+ async function rawRunScript(args = [], env = {}) {
|
||||
let result
|
||||
try {
|
||||
result = await promisify(execFile)(
|
||||
process.argv0,
|
||||
- [
|
||||
- 'storage/scripts/back_fill_file_hash.mjs',
|
||||
- '--output=-',
|
||||
- '--projects',
|
||||
- '--deleted-projects',
|
||||
- ...args,
|
||||
- ],
|
||||
+ ['storage/scripts/back_fill_file_hash.mjs', ...args],
|
||||
{
|
||||
encoding: 'utf-8',
|
||||
timeout: TIMEOUT - 500,
|
||||
@@ -521,6 +514,20 @@ describe('back_fill_file_hash script', function () {
|
||||
expect((await fs.promises.readdir('/tmp')).join(';')).to.not.match(
|
||||
/back_fill_file_hash/
|
||||
)
|
||||
+ return result
|
||||
+ }
|
||||
+
|
||||
+ /**
|
||||
+ * @param {Array<string>} args
|
||||
+ * @param {Record<string, string>} env
|
||||
+ * @param {boolean} shouldHaveWritten
|
||||
+ * @return {Promise<{result, stats: any}>}
|
||||
+ */
|
||||
+ async function tryRunScript(args = [], env = {}, shouldHaveWritten) {
|
||||
+ const result = await rawRunScript(
|
||||
+ ['--output=-', '--projects', '--deleted-projects', ...args],
|
||||
+ env
|
||||
+ )
|
||||
const extraStatsKeys = ['eventLoop', 'readFromGCSThroughputMiBPerSecond']
|
||||
const stats = JSON.parse(
|
||||
result.stderr
|
||||
@@ -1078,6 +1085,35 @@ describe('back_fill_file_hash script', function () {
|
||||
})
|
||||
commonAssertions(true)
|
||||
})
|
||||
+ describe('report mode', function () {
|
||||
+ let output
|
||||
+ before('prepare environment', prepareEnvironment)
|
||||
+ before('run script', async function () {
|
||||
+ output = await rawRunScript(['--report'], {})
|
||||
+ })
|
||||
+ it('should print the report', () => {
|
||||
+ expect(output.status).to.equal(0)
|
||||
+ console.log(output.stdout)
|
||||
+ expect(output.stdout).to.equal(`\
|
||||
+Current status:
|
||||
+- Total number of projects: 10
|
||||
+- Total number of deleted projects: 5
|
||||
+Sampling 1000 projects to estimate progress...
|
||||
+Sampled stats for projects:
|
||||
+- Sampled projects: 9 (90% of all projects)
|
||||
+- Sampled projects with all hashes present: 5
|
||||
+- Percentage of projects that need back-filling hashes: 56% (estimated)
|
||||
+- Sampled projects have 11 files that need to be checked against the full project history system.
|
||||
+- Sampled projects have 3 files that need to be uploaded to the full project history system (estimating 27% of all files).
|
||||
+Sampled stats for deleted projects:
|
||||
+- Sampled deleted projects: 4 (80% of all deleted projects)
|
||||
+- Sampled deleted projects with all hashes present: 3
|
||||
+- Percentage of deleted projects that need back-filling hashes: 75% (estimated)
|
||||
+- Sampled deleted projects have 2 files that need to be checked against the full project history system.
|
||||
+- Sampled deleted projects have 1 files that need to be uploaded to the full project history system (estimating 50% of all files).
|
||||
+`)
|
||||
+ })
|
||||
+ })
|
||||
|
||||
describe('full run in dry-run mode', function () {
|
||||
let output
|
||||
|
||||
|
||||
|
||||
diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
index e9a7721944c..9c2a9818680 100644
|
||||
--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
+++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs
|
||||
@@ -79,7 +79,7 @@ ObjectId.cacheHexString = true
|
||||
*/
|
||||
|
||||
/**
|
||||
- * @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean, PROCESS_BLOBS: boolean, DRY_RUN: boolean, OUTPUT_FILE: string, DISPLAY_REPORT: boolean}}
|
||||
+ * @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean, PROCESS_BLOBS: boolean, DRY_RUN: boolean, OUTPUT_FILE: string, DISPLAY_REPORT: boolean, CONCURRENCY: number, CONCURRENT_BATCHES: number, RETRIES: number, RETRY_DELAY_MS: number, RETRY_FILESTORE_404: boolean, BUFFER_DIR_PREFIX: string, STREAM_HIGH_WATER_MARK: number, LOGGING_INTERVAL: number, SLEEP_BEFORE_EXIT: number }}
|
||||
*/
|
||||
function parseArgs() {
|
||||
const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z')
|
||||
@@ -95,6 +95,12 @@ function parseArgs() {
|
||||
{ name: 'skip-hashed-files', type: Boolean },
|
||||
{ name: 'skip-existing-blobs', type: Boolean },
|
||||
{ name: 'from-file', type: String, defaultValue: '' },
|
||||
+ { name: 'concurrency', type: Number, defaultValue: 10 },
|
||||
+ { name: 'concurrent-batches', type: Number, defaultValue: 1 },
|
||||
+ { name: 'stream-high-water-mark', type: Number, defaultValue: 1024 * 1024 },
|
||||
+ { name: 'retries', type: Number, defaultValue: 10 },
|
||||
+ { name: 'retry-delay-ms', type: Number, defaultValue: 100 },
|
||||
+ { name: 'retry-filestore-404', type: Boolean },
|
||||
{ name: 'dry-run', alias: 'n', type: Boolean },
|
||||
{
|
||||
name: 'output',
|
||||
@@ -114,6 +120,13 @@ function parseArgs() {
|
||||
defaultValue: new Date().toISOString(),
|
||||
},
|
||||
{ name: 'logging-id', type: String, defaultValue: '' },
|
||||
+ { name: 'logging-interval-ms', type: Number, defaultValue: 60_000 },
|
||||
+ {
|
||||
+ name: 'buffer-dir-prefix',
|
||||
+ type: String,
|
||||
+ defaultValue: '/tmp/back_fill_file_hash-',
|
||||
+ },
|
||||
+ { name: 'sleep-before-exit-ms', type: Number, defaultValue: 1_000 },
|
||||
])
|
||||
|
||||
// If no arguments are provided, display a usage message
|
||||
@@ -143,6 +156,8 @@ Logging options:
|
||||
(default: file-migration-<timestamp>.log)
|
||||
--logging-id <id> Identifier for logging
|
||||
(default: BATCH_RANGE_START)
|
||||
+ --logging-interval-ms <ms> Interval for logging progres stats
|
||||
+ (default: 60000, 1min)
|
||||
|
||||
Batch range options:
|
||||
--BATCH_RANGE_START <date> Start date for processing
|
||||
@@ -150,10 +165,30 @@ Batch range options:
|
||||
--BATCH_RANGE_END <date> End date for processing
|
||||
(default: ${args.BATCH_RANGE_END})
|
||||
|
||||
+Concurrency:
|
||||
+ --concurrency <n> Number of files to process concurrently
|
||||
+ (default: 10)
|
||||
+ --concurrent-batches <n> Number of project batches to process concurrently
|
||||
+ (default: 1)
|
||||
+ --stream-high-water-mark n In-Memory buffering threshold
|
||||
+ (default: 1MiB)
|
||||
+
|
||||
+Retries:
|
||||
+ --retries <n> Number of times to retry processing a file
|
||||
+ (default: 10)
|
||||
+ --retry-delay-ms <ms> How long to wait before processing a file again
|
||||
+ (default: 100, 100ms)
|
||||
+ --retry-filestore-404 Retry downloading a file when receiving a 404
|
||||
+ (default: false)
|
||||
+
|
||||
Other options:
|
||||
--report Display a report of the current status
|
||||
--dry-run, -n Perform a dry run without making changes
|
||||
--help, -h Show this help message
|
||||
+ --buffer-dir-prefix <p> Folder/prefix for buffering files on disk
|
||||
+ (default: ${args['buffer-dir-prefix']})
|
||||
+ --sleep-before-exit-ms <n> Defer exiting from the script
|
||||
+ (default: 1000, 1s)
|
||||
|
||||
Typical usage:
|
||||
|
||||
@@ -212,8 +247,17 @@ is equivalent to
|
||||
BATCH_RANGE_START,
|
||||
BATCH_RANGE_END,
|
||||
LOGGING_IDENTIFIER: args['logging-id'] || BATCH_RANGE_START,
|
||||
+ LOGGING_INTERVAL: args['logging-interval-ms'],
|
||||
PROJECT_IDS_FROM: args['from-file'],
|
||||
DISPLAY_REPORT: args.report,
|
||||
+ CONCURRENCY: args.concurrency,
|
||||
+ CONCURRENT_BATCHES: args['concurrent-batches'],
|
||||
+ STREAM_HIGH_WATER_MARK: args['stream-high-water-mark'],
|
||||
+ RETRIES: args.retries,
|
||||
+ RETRY_DELAY_MS: args['retry-delay-ms'],
|
||||
+ RETRY_FILESTORE_404: args['retry-filestore-404'],
|
||||
+ BUFFER_DIR_PREFIX: args['buffer-dir-prefix'],
|
||||
+ SLEEP_BEFORE_EXIT: args['sleep-before-exit-ms'],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -229,6 +273,15 @@ const {
|
||||
LOGGING_IDENTIFIER,
|
||||
PROJECT_IDS_FROM,
|
||||
DISPLAY_REPORT,
|
||||
+ CONCURRENCY,
|
||||
+ CONCURRENT_BATCHES,
|
||||
+ RETRIES,
|
||||
+ RETRY_DELAY_MS,
|
||||
+ RETRY_FILESTORE_404,
|
||||
+ BUFFER_DIR_PREFIX,
|
||||
+ STREAM_HIGH_WATER_MARK,
|
||||
+ LOGGING_INTERVAL,
|
||||
+ SLEEP_BEFORE_EXIT,
|
||||
} = parseArgs()
|
||||
|
||||
// We need to handle the start and end differently as ids of deleted projects are created at time of deletion.
|
||||
@@ -236,24 +289,7 @@ if (process.env.BATCH_RANGE_START || process.env.BATCH_RANGE_END) {
|
||||
throw new Error('use --BATCH_RANGE_START and --BATCH_RANGE_END')
|
||||
}
|
||||
|
||||
-// Concurrency for downloading from GCS and updating hashes in mongo
|
||||
-const CONCURRENCY = parseInt(process.env.CONCURRENCY || '100', 10)
|
||||
-const CONCURRENT_BATCHES = parseInt(process.env.CONCURRENT_BATCHES || '2', 10)
|
||||
-// Retries for processing a given file
|
||||
-const RETRIES = parseInt(process.env.RETRIES || '10', 10)
|
||||
-const RETRY_DELAY_MS = parseInt(process.env.RETRY_DELAY_MS || '100', 10)
|
||||
-
|
||||
-const RETRY_FILESTORE_404 = process.env.RETRY_FILESTORE_404 === 'true'
|
||||
-const BUFFER_DIR = fs.mkdtempSync(
|
||||
- process.env.BUFFER_DIR_PREFIX || '/tmp/back_fill_file_hash-'
|
||||
-)
|
||||
-// https://nodejs.org/api/stream.html#streamgetdefaulthighwatermarkobjectmode
|
||||
-const STREAM_HIGH_WATER_MARK = parseInt(
|
||||
- process.env.STREAM_HIGH_WATER_MARK || (64 * 1024).toString(),
|
||||
- 10
|
||||
-)
|
||||
-const LOGGING_INTERVAL = parseInt(process.env.LOGGING_INTERVAL || '60000', 10)
|
||||
-const SLEEP_BEFORE_EXIT = parseInt(process.env.SLEEP_BEFORE_EXIT || '1000', 10)
|
||||
+const BUFFER_DIR = fs.mkdtempSync(BUFFER_DIR_PREFIX)
|
||||
|
||||
// Log output to a file
|
||||
if (OUTPUT_FILE !== '-') {
|
||||
@@ -416,7 +452,7 @@ async function displayReport() {
|
||||
)
|
||||
}
|
||||
|
||||
-// Filestore endpoint location
|
||||
+// Filestore endpoint location (configured by /etc/overleaf/env.sh)
|
||||
const FILESTORE_HOST = process.env.FILESTORE_HOST || '127.0.0.1'
|
||||
const FILESTORE_PORT = process.env.FILESTORE_PORT || '3009'
|
||||
|
||||
diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
index 7248e74cb3f..601cea13b6a 100644
|
||||
--- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
+++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs
|
||||
@@ -61,9 +61,8 @@ function objectIdFromTime(timestamp) {
|
||||
|
||||
const PRINT_IDS_AND_HASHES_FOR_DEBUGGING = false
|
||||
|
||||
-describe('back_fill_file_hash script', function () {
|
||||
+describe.only('back_fill_file_hash script', function () {
|
||||
this.timeout(TIMEOUT)
|
||||
- const USER_FILES_BUCKET_NAME = 'fake-user-files-gcs'
|
||||
|
||||
const projectId0 = objectIdFromTime('2017-01-01T00:00:00Z')
|
||||
const projectId1 = objectIdFromTime('2017-01-01T00:01:00Z')
|
||||
@@ -480,24 +479,24 @@ describe('back_fill_file_hash script', function () {
|
||||
|
||||
/**
|
||||
* @param {Array<string>} args
|
||||
- * @param {Record<string, string>} env
|
||||
* @return {Promise<{result: { stdout: string, stderr: string, status: number }, stats: any}>}
|
||||
*/
|
||||
- async function rawRunScript(args = [], env = {}) {
|
||||
+ async function rawRunScript(args = []) {
|
||||
let result
|
||||
try {
|
||||
result = await promisify(execFile)(
|
||||
process.argv0,
|
||||
- ['storage/scripts/back_fill_file_hash.mjs', ...args],
|
||||
+ [
|
||||
+ 'storage/scripts/back_fill_file_hash.mjs',
|
||||
+ '--sleep-before-exit-ms=1',
|
||||
+ ...args,
|
||||
+ ],
|
||||
{
|
||||
encoding: 'utf-8',
|
||||
timeout: TIMEOUT - 500,
|
||||
env: {
|
||||
...process.env,
|
||||
AWS_SDK_JS_SUPPRESS_MAINTENANCE_MODE_MESSAGE: '1',
|
||||
- USER_FILES_BUCKET_NAME,
|
||||
- SLEEP_BEFORE_EXIT: '1',
|
||||
- ...env,
|
||||
LOG_LEVEL: 'warn', // Override LOG_LEVEL of acceptance tests
|
||||
},
|
||||
}
|
||||
@@ -519,15 +518,16 @@ describe('back_fill_file_hash script', function () {
|
||||
|
||||
/**
|
||||
* @param {Array<string>} args
|
||||
- * @param {Record<string, string>} env
|
||||
* @param {boolean} shouldHaveWritten
|
||||
* @return {Promise<{result, stats: any}>}
|
||||
*/
|
||||
- async function tryRunScript(args = [], env = {}, shouldHaveWritten) {
|
||||
- const result = await rawRunScript(
|
||||
- ['--output=-', '--projects', '--deleted-projects', ...args],
|
||||
- env
|
||||
- )
|
||||
+ async function tryRunScript(args = [], shouldHaveWritten) {
|
||||
+ const result = await rawRunScript([
|
||||
+ '--output=-',
|
||||
+ '--projects',
|
||||
+ '--deleted-projects',
|
||||
+ ...args,
|
||||
+ ])
|
||||
const extraStatsKeys = ['eventLoop', 'readFromGCSThroughputMiBPerSecond']
|
||||
const stats = JSON.parse(
|
||||
result.stderr
|
||||
@@ -558,12 +558,11 @@ describe('back_fill_file_hash script', function () {
|
||||
|
||||
/**
|
||||
* @param {Array<string>} args
|
||||
- * @param {Record<string, string>} env
|
||||
* @param {boolean} shouldHaveWritten
|
||||
* @return {Promise<{result, stats: any}>}
|
||||
*/
|
||||
- async function runScript(args = [], env = {}, shouldHaveWritten = true) {
|
||||
- const { stats, result } = await tryRunScript(args, env, shouldHaveWritten)
|
||||
+ async function runScript(args = [], shouldHaveWritten = true) {
|
||||
+ const { stats, result } = await tryRunScript(args, shouldHaveWritten)
|
||||
if (result.status !== 0) {
|
||||
console.log(result)
|
||||
expect(result).to.have.property('status', 0)
|
||||
@@ -812,7 +811,6 @@ describe('back_fill_file_hash script', function () {
|
||||
it('should process nothing on re-run', async function () {
|
||||
const rerun = await runScript(
|
||||
!processHashedFiles ? ['--skip-hashed-files'] : [],
|
||||
- {},
|
||||
false
|
||||
)
|
||||
let stats = {
|
||||
@@ -937,10 +935,11 @@ describe('back_fill_file_hash script', function () {
|
||||
it('should gracefully handle fatal errors', async function () {
|
||||
mockFilestore.deleteObject(projectId0, fileId0)
|
||||
const t0 = Date.now()
|
||||
- const { stats, result } = await tryRunScript(['--skip-hashed-files'], {
|
||||
- RETRIES: '10',
|
||||
- RETRY_DELAY_MS: '1000',
|
||||
- })
|
||||
+ const { stats, result } = await tryRunScript([
|
||||
+ '--skip-hashed-files',
|
||||
+ '--retries=10',
|
||||
+ '--retry-delay-ms=1000',
|
||||
+ ])
|
||||
const t1 = Date.now()
|
||||
expectNotFoundError(result, 'failed to process file')
|
||||
expect(result.status).to.equal(1)
|
||||
@@ -972,11 +971,12 @@ describe('back_fill_file_hash script', function () {
|
||||
value: { stats, result },
|
||||
},
|
||||
] = await Promise.allSettled([
|
||||
- tryRunScript(['--skip-hashed-files'], {
|
||||
- RETRY_DELAY_MS: '100',
|
||||
- RETRIES: '60',
|
||||
- RETRY_FILESTORE_404: 'true', // 404s are the easiest to simulate in tests
|
||||
- }),
|
||||
+ tryRunScript([
|
||||
+ '--skip-hashed-files',
|
||||
+ '--retries=60',
|
||||
+ '--retry-delay-ms=1000',
|
||||
+ '--retry-filestore-404',
|
||||
+ ]),
|
||||
restoreFileAfter5s(),
|
||||
])
|
||||
expectNotFoundError(result, 'failed to process file, trying again')
|
||||
@@ -998,9 +998,7 @@ describe('back_fill_file_hash script', function () {
|
||||
let output
|
||||
before('prepare environment', prepareEnvironment)
|
||||
before('run script', async function () {
|
||||
- output = await runScript(['--skip-hashed-files'], {
|
||||
- CONCURRENCY: '1',
|
||||
- })
|
||||
+ output = await runScript(['--skip-hashed-files', '--concurrency=1'])
|
||||
})
|
||||
|
||||
/**
|
||||
@@ -1067,10 +1065,10 @@ describe('back_fill_file_hash script', function () {
|
||||
let output1, output2
|
||||
before('prepare environment', prepareEnvironment)
|
||||
before('run script without hashed files', async function () {
|
||||
- output1 = await runScript(['--skip-hashed-files'], {})
|
||||
+ output1 = await runScript(['--skip-hashed-files'])
|
||||
})
|
||||
before('run script with hashed files', async function () {
|
||||
- output2 = await runScript([], {})
|
||||
+ output2 = await runScript([])
|
||||
})
|
||||
it('should print stats for the first run without hashed files', function () {
|
||||
expect(output1.stats).deep.equal(STATS_ALL)
|
||||
@@ -1089,7 +1087,7 @@ describe('back_fill_file_hash script', function () {
|
||||
let output
|
||||
before('prepare environment', prepareEnvironment)
|
||||
before('run script', async function () {
|
||||
- output = await rawRunScript(['--report'], {})
|
||||
+ output = await rawRunScript(['--report'])
|
||||
})
|
||||
it('should print the report', () => {
|
||||
expect(output.status).to.equal(0)
|
||||
@@ -1127,13 +1125,7 @@ Sampled stats for deleted projects:
|
||||
.toArray()
|
||||
})
|
||||
before('run script', async function () {
|
||||
- output = await runScript(
|
||||
- ['--dry-run'],
|
||||
- {
|
||||
- CONCURRENCY: '1',
|
||||
- },
|
||||
- false
|
||||
- )
|
||||
+ output = await runScript(['--dry-run', '--concurrency=1'], false)
|
||||
})
|
||||
|
||||
it('should print stats for dry-run mode', function () {
|
||||
@@ -1174,9 +1166,7 @@ Sampled stats for deleted projects:
|
||||
let output
|
||||
before('prepare environment', prepareEnvironment)
|
||||
before('run script', async function () {
|
||||
- output = await runScript(['--skip-hashed-files'], {
|
||||
- CONCURRENCY: '10',
|
||||
- })
|
||||
+ output = await runScript(['--skip-hashed-files', '--concurrency=10'])
|
||||
})
|
||||
it('should print stats', function () {
|
||||
expect(output.stats).deep.equal(STATS_ALL)
|
||||
@@ -1184,13 +1174,14 @@ Sampled stats for deleted projects:
|
||||
commonAssertions()
|
||||
})
|
||||
|
||||
- describe('full run STREAM_HIGH_WATER_MARK=1MB', function () {
|
||||
+ describe('full run STREAM_HIGH_WATER_MARK=64kiB', function () {
|
||||
let output
|
||||
before('prepare environment', prepareEnvironment)
|
||||
before('run script', async function () {
|
||||
- output = await runScript(['--skip-hashed-files'], {
|
||||
- STREAM_HIGH_WATER_MARK: (1024 * 1024).toString(),
|
||||
- })
|
||||
+ output = await runScript([
|
||||
+ '--skip-hashed-files',
|
||||
+ `--stream-high-water-mark=${64 * 1024}`,
|
||||
+ ])
|
||||
})
|
||||
it('should print stats', function () {
|
||||
expect(output.stats).deep.equal(STATS_ALL)
|
||||
@@ -1202,7 +1193,7 @@ Sampled stats for deleted projects:
|
||||
let output
|
||||
before('prepare environment', prepareEnvironment)
|
||||
before('run script', async function () {
|
||||
- output = await runScript([], {})
|
||||
+ output = await runScript([])
|
||||
})
|
||||
it('should print stats', function () {
|
||||
expect(output.stats).deep.equal(
|
||||
@@ -1231,9 +1222,7 @@ Sampled stats for deleted projects:
|
||||
})
|
||||
let output
|
||||
before('run script', async function () {
|
||||
- output = await runScript(['--skip-hashed-files'], {
|
||||
- CONCURRENCY: '1',
|
||||
- })
|
||||
+ output = await runScript(['--skip-hashed-files', '--concurrency=1'])
|
||||
})
|
||||
|
||||
it('should print stats', function () {
|
||||
@@ -1252,20 +1241,18 @@ Sampled stats for deleted projects:
|
||||
let outputPart0, outputPart1
|
||||
before('prepare environment', prepareEnvironment)
|
||||
before('run script on part 0', async function () {
|
||||
- outputPart0 = await runScript(
|
||||
- ['--skip-hashed-files', `--BATCH_RANGE_END=${edge}`],
|
||||
- {
|
||||
- CONCURRENCY: '1',
|
||||
- }
|
||||
- )
|
||||
+ outputPart0 = await runScript([
|
||||
+ '--skip-hashed-files',
|
||||
+ `--BATCH_RANGE_END=${edge}`,
|
||||
+ '--concurrency=1',
|
||||
+ ])
|
||||
})
|
||||
before('run script on part 1', async function () {
|
||||
- outputPart1 = await runScript(
|
||||
- ['--skip-hashed-files', `--BATCH_RANGE_START=${edge}`],
|
||||
- {
|
||||
- CONCURRENCY: '1',
|
||||
- }
|
||||
- )
|
||||
+ outputPart1 = await runScript([
|
||||
+ '--skip-hashed-files',
|
||||
+ `--BATCH_RANGE_START=${edge}`,
|
||||
+ '--concurrency=1',
|
||||
+ ])
|
||||
})
|
||||
|
||||
it('should print stats for part 0', function () {
|
||||
|
||||
165
server-ce/hotfix/5.5.3/pr_27476.patch-stage-2
Normal file
165
server-ce/hotfix/5.5.3/pr_27476.patch-stage-2
Normal file
@@ -0,0 +1,165 @@
|
||||
diff --git a/services/web/app/src/Features/Collaborators/OwnershipTransferHandler.js b/services/web/app/src/Features/Collaborators/OwnershipTransferHandler.js
|
||||
index e22818ebb880..81ec5ccb0aa5 100644
|
||||
--- a/services/web/app/src/Features/Collaborators/OwnershipTransferHandler.js
|
||||
+++ b/services/web/app/src/Features/Collaborators/OwnershipTransferHandler.js
|
||||
@@ -9,9 +9,75 @@ const PrivilegeLevels = require('../Authorization/PrivilegeLevels')
|
||||
const TpdsProjectFlusher = require('../ThirdPartyDataStore/TpdsProjectFlusher')
|
||||
const ProjectAuditLogHandler = require('../Project/ProjectAuditLogHandler')
|
||||
const AnalyticsManager = require('../Analytics/AnalyticsManager')
|
||||
+const OError = require('@overleaf/o-error')
|
||||
+const TagsHandler = require('../Tags/TagsHandler')
|
||||
+const { promiseMapWithLimit } = require('@overleaf/promise-utils')
|
||||
|
||||
module.exports = {
|
||||
- promises: { transferOwnership },
|
||||
+ promises: {
|
||||
+ transferOwnership,
|
||||
+ transferAllProjectsToUser,
|
||||
+ },
|
||||
+}
|
||||
+
|
||||
+const TAG_COLOR_BLUE = '#434AF0'
|
||||
+
|
||||
+/**
|
||||
+ * @param {string} fromUserId
|
||||
+ * @param {string} toUserId
|
||||
+ * @param {string} ipAddress
|
||||
+ * @return {Promise<{projectCount: number, newTagName: string}>}
|
||||
+ */
|
||||
+async function transferAllProjectsToUser({ fromUserId, toUserId, ipAddress }) {
|
||||
+ // - Verify that both users exist
|
||||
+ const fromUser = await UserGetter.promises.getUser(fromUserId, {
|
||||
+ _id: 1,
|
||||
+ email: 1,
|
||||
+ })
|
||||
+ const toUser = await UserGetter.promises.getUser(toUserId, { _id: 1 })
|
||||
+ if (!fromUser) throw new OError('missing source user', { fromUserId })
|
||||
+ if (!toUser) throw new OError('missing destination user', { toUserId })
|
||||
+ if (fromUser._id.equals(toUser._id))
|
||||
+ throw new OError('rejecting transfer between identical users', {
|
||||
+ fromUserId,
|
||||
+ toUserId,
|
||||
+ })
|
||||
+ logger.debug(
|
||||
+ { fromUserId, toUserId },
|
||||
+ 'started bulk transfer of all projects from one user to another'
|
||||
+ )
|
||||
+ // - Get all owned projects for fromUserId
|
||||
+ const projects = await Project.find({ owner_ref: fromUserId }, { _id: 1 })
|
||||
+
|
||||
+ // - Create new tag on toUserId
|
||||
+ const newTag = await TagsHandler.promises.createTag(
|
||||
+ toUserId,
|
||||
+ `transferred-from-${fromUser.email}`,
|
||||
+ TAG_COLOR_BLUE,
|
||||
+ { truncate: true }
|
||||
+ )
|
||||
+
|
||||
+ // - Add tag to projects (can happen before ownership is transferred)
|
||||
+ await TagsHandler.promises.addProjectsToTag(
|
||||
+ toUserId,
|
||||
+ newTag._id,
|
||||
+ projects.map(p => p._id)
|
||||
+ )
|
||||
+
|
||||
+ // - Transfer all projects
|
||||
+ await promiseMapWithLimit(5, projects, async project => {
|
||||
+ await transferOwnership(project._id, toUserId, {
|
||||
+ allowTransferToNonCollaborators: true,
|
||||
+ skipEmails: true,
|
||||
+ ipAddress,
|
||||
+ })
|
||||
+ })
|
||||
+
|
||||
+ logger.debug(
|
||||
+ { fromUserId, toUserId },
|
||||
+ 'finished bulk transfer of all projects from one user to another'
|
||||
+ )
|
||||
+ return { projectCount: projects.length, newTagName: newTag.name }
|
||||
}
|
||||
|
||||
async function transferOwnership(projectId, newOwnerId, options = {}) {
|
||||
@@ -74,8 +140,8 @@ async function transferOwnership(projectId, newOwnerId, options = {}) {
|
||||
await TpdsProjectFlusher.promises.flushProjectToTpds(projectId)
|
||||
|
||||
// Send confirmation emails
|
||||
- const previousOwner = await UserGetter.promises.getUser(previousOwnerId)
|
||||
if (!skipEmails) {
|
||||
+ const previousOwner = await UserGetter.promises.getUser(previousOwnerId)
|
||||
await _sendEmails(project, previousOwner, newOwner)
|
||||
}
|
||||
}
|
||||
diff --git a/services/web/modules/server-ce-scripts/scripts/transfer-all-projects-to-user.mjs b/services/web/modules/server-ce-scripts/scripts/transfer-all-projects-to-user.mjs
|
||||
new file mode 100644
|
||||
index 000000000000..6ff1215de53b
|
||||
--- /dev/null
|
||||
+++ b/services/web/modules/server-ce-scripts/scripts/transfer-all-projects-to-user.mjs
|
||||
@@ -0,0 +1,46 @@
|
||||
+import { ObjectId } from 'mongodb'
|
||||
+import minimist from 'minimist'
|
||||
+import OwnershipTransferHandler from '../../../app/src/Features/Collaborators/OwnershipTransferHandler.js'
|
||||
+import UserGetter from '../../../app/src/Features/User/UserGetter.js'
|
||||
+import EmailHelper from '../../../app/src/Features/Helpers/EmailHelper.js'
|
||||
+
|
||||
+const args = minimist(process.argv.slice(2), {
|
||||
+ string: ['from-user', 'to-user'],
|
||||
+})
|
||||
+
|
||||
+/**
|
||||
+ * @param {string} flag
|
||||
+ * @return {Promise<string>}
|
||||
+ */
|
||||
+async function resolveUser(flag) {
|
||||
+ const raw = args[flag]
|
||||
+ if (!raw) throw new Error(`missing parameter --${flag}`)
|
||||
+ if (ObjectId.isValid(raw)) return raw
|
||||
+ const email = EmailHelper.parseEmail(raw)
|
||||
+ if (!email) throw new Error(`invalid email --${flag}=${raw}`)
|
||||
+ const user = await UserGetter.promises.getUser({ email: email }, { _id: 1 })
|
||||
+ if (!user)
|
||||
+ throw new Error(`user with email --${flag}=${email} does not exist`)
|
||||
+ return user._id.toString()
|
||||
+}
|
||||
+
|
||||
+async function main() {
|
||||
+ const fromUserId = await resolveUser('from-user')
|
||||
+ const toUserId = await resolveUser('to-user')
|
||||
+ await OwnershipTransferHandler.promises.transferAllProjectsToUser({
|
||||
+ fromUserId,
|
||||
+ toUserId,
|
||||
+ ipAddress: '0.0.0.0',
|
||||
+ })
|
||||
+}
|
||||
+
|
||||
+main()
|
||||
+ .then(() => {
|
||||
+ console.error('Done.')
|
||||
+ process.exit(0)
|
||||
+ })
|
||||
+ .catch(err => {
|
||||
+ console.error('---')
|
||||
+ console.error(err)
|
||||
+ process.exit(1)
|
||||
+ })
|
||||
|
||||
|
||||
diff --git a/services/web/modules/server-ce-scripts/scripts/transfer-all-projects-to-user.mjs b/services/web/modules/server-ce-scripts/scripts/transfer-all-projects-to-user.mjs
|
||||
index 6ff1215de53b..8c5951334403 100644
|
||||
--- a/services/web/modules/server-ce-scripts/scripts/transfer-all-projects-to-user.mjs
|
||||
+++ b/services/web/modules/server-ce-scripts/scripts/transfer-all-projects-to-user.mjs
|
||||
@@ -1,4 +1,4 @@
|
||||
-import { ObjectId } from 'mongodb'
|
||||
+import { ObjectId } from '../../../app/src/infrastructure/mongodb.js'
|
||||
import minimist from 'minimist'
|
||||
import OwnershipTransferHandler from '../../../app/src/Features/Collaborators/OwnershipTransferHandler.js'
|
||||
import UserGetter from '../../../app/src/Features/User/UserGetter.js'
|
||||
@@ -18,7 +18,7 @@ async function resolveUser(flag) {
|
||||
if (ObjectId.isValid(raw)) return raw
|
||||
const email = EmailHelper.parseEmail(raw)
|
||||
if (!email) throw new Error(`invalid email --${flag}=${raw}`)
|
||||
- const user = await UserGetter.promises.getUser({ email: email }, { _id: 1 })
|
||||
+ const user = await UserGetter.promises.getUser({ email }, { _id: 1 })
|
||||
if (!user)
|
||||
throw new Error(`user with email --${flag}=${email} does not exist`)
|
||||
return user._id.toString()
|
||||
|
||||
2509
server-ce/hotfix/5.5.3/sec-npm.patch
Normal file
2509
server-ce/hotfix/5.5.3/sec-npm.patch
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user