mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-31 12:51:35 +02:00
[web] remove deledFiles collection (#25750)
* [history-v1] remove processing of deleted files when back-filling hashes * [web] remove deledFiles collection GitOrigin-RevId: 7c080e564f7d7acb33ebe7ebe012f415a847d0df
This commit is contained in:
@@ -89,14 +89,13 @@ ObjectId.cacheHexString = true
|
||||
*/
|
||||
|
||||
/**
|
||||
* @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, PROCESS_DELETED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, PROCESS_BLOBS: boolean, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean, COLLECT_BACKED_UP_BLOBS: boolean}}
|
||||
* @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, PROCESS_BLOBS: boolean, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean, COLLECT_BACKED_UP_BLOBS: boolean}}
|
||||
*/
|
||||
function parseArgs() {
|
||||
const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z')
|
||||
const args = commandLineArgs([
|
||||
{ name: 'processNonDeletedProjects', type: String, defaultValue: 'false' },
|
||||
{ name: 'processDeletedProjects', type: String, defaultValue: 'false' },
|
||||
{ name: 'processDeletedFiles', type: String, defaultValue: 'false' },
|
||||
{ name: 'processHashedFiles', type: String, defaultValue: 'false' },
|
||||
{ name: 'processBlobs', type: String, defaultValue: 'true' },
|
||||
{ name: 'projectIdsFrom', type: String, defaultValue: '' },
|
||||
@@ -131,7 +130,6 @@ function parseArgs() {
|
||||
PROCESS_NON_DELETED_PROJECTS: boolVal('processNonDeletedProjects'),
|
||||
PROCESS_DELETED_PROJECTS: boolVal('processDeletedProjects'),
|
||||
PROCESS_BLOBS: boolVal('processBlobs'),
|
||||
PROCESS_DELETED_FILES: boolVal('processDeletedFiles'),
|
||||
PROCESS_HASHED_FILES: boolVal('processHashedFiles'),
|
||||
COLLECT_BACKED_UP_BLOBS: boolVal('collectBackedUpBlobs'),
|
||||
BATCH_RANGE_START,
|
||||
@@ -145,7 +143,6 @@ const {
|
||||
PROCESS_NON_DELETED_PROJECTS,
|
||||
PROCESS_DELETED_PROJECTS,
|
||||
PROCESS_BLOBS,
|
||||
PROCESS_DELETED_FILES,
|
||||
PROCESS_HASHED_FILES,
|
||||
COLLECT_BACKED_UP_BLOBS,
|
||||
BATCH_RANGE_START,
|
||||
@@ -188,7 +185,6 @@ const typedProjectsCollection = db.collection('projects')
|
||||
const deletedProjectsCollection = db.collection('deletedProjects')
|
||||
/** @type {DeletedProjectsCollection} */
|
||||
const typedDeletedProjectsCollection = db.collection('deletedProjects')
|
||||
const deletedFilesCollection = db.collection('deletedFiles')
|
||||
|
||||
const concurrencyLimit = pLimit(CONCURRENCY)
|
||||
|
||||
@@ -647,22 +643,15 @@ async function queueNextBatch(batch, prefix = 'rootFolder.0') {
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async function processBatch(batch, prefix = 'rootFolder.0') {
|
||||
const [deletedFiles, { nBlobs, blobs }, { nBackedUpBlobs, backedUpBlobs }] =
|
||||
await Promise.all([
|
||||
collectDeletedFiles(batch),
|
||||
collectProjectBlobs(batch),
|
||||
collectBackedUpBlobs(batch),
|
||||
])
|
||||
const files = Array.from(
|
||||
findFileInBatch(batch, prefix, deletedFiles, blobs, backedUpBlobs)
|
||||
)
|
||||
const [{ nBlobs, blobs }, { nBackedUpBlobs, backedUpBlobs }] =
|
||||
await Promise.all([collectProjectBlobs(batch), collectBackedUpBlobs(batch)])
|
||||
const files = Array.from(findFileInBatch(batch, prefix, blobs, backedUpBlobs))
|
||||
STATS.projects += batch.length
|
||||
STATS.blobs += nBlobs
|
||||
STATS.backedUpBlobs += nBackedUpBlobs
|
||||
|
||||
// GC
|
||||
batch.length = 0
|
||||
deletedFiles.clear()
|
||||
blobs.clear()
|
||||
backedUpBlobs.clear()
|
||||
|
||||
@@ -713,9 +702,7 @@ async function handleDeletedFileTreeBatch(batch) {
|
||||
* @return {Promise<boolean>}
|
||||
*/
|
||||
async function tryUpdateFileRefInMongo(entry) {
|
||||
if (entry.path === MONGO_PATH_DELETED_FILE) {
|
||||
return await tryUpdateDeletedFileRefInMongo(entry)
|
||||
} else if (entry.path.startsWith('project.')) {
|
||||
if (entry.path.startsWith('project.')) {
|
||||
return await tryUpdateFileRefInMongoInDeletedProject(entry)
|
||||
}
|
||||
|
||||
@@ -732,22 +719,6 @@ async function tryUpdateFileRefInMongo(entry) {
|
||||
return result.matchedCount === 1
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {QueueEntry} entry
|
||||
* @return {Promise<boolean>}
|
||||
*/
|
||||
async function tryUpdateDeletedFileRefInMongo(entry) {
|
||||
STATS.mongoUpdates++
|
||||
const result = await deletedFilesCollection.updateOne(
|
||||
{
|
||||
_id: new ObjectId(entry.fileId),
|
||||
projectId: entry.ctx.projectId,
|
||||
},
|
||||
{ $set: { hash: entry.hash } }
|
||||
)
|
||||
return result.matchedCount === 1
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {QueueEntry} entry
|
||||
* @return {Promise<boolean>}
|
||||
@@ -812,7 +783,6 @@ async function updateFileRefInMongo(entry) {
|
||||
break
|
||||
}
|
||||
if (!found) {
|
||||
if (await tryUpdateDeletedFileRefInMongo(entry)) return
|
||||
STATS.fileHardDeleted++
|
||||
console.warn('bug: file hard-deleted while processing', projectId, fileId)
|
||||
return
|
||||
@@ -905,49 +875,22 @@ function* findFiles(ctx, folder, path, isInputLoop = false) {
|
||||
/**
|
||||
* @param {Array<Project>} projects
|
||||
* @param {string} prefix
|
||||
* @param {Map<string,Array<DeletedFileRef>>} deletedFiles
|
||||
* @param {Map<string,Array<Blob>>} blobs
|
||||
* @param {Map<string,Array<string>>} backedUpBlobs
|
||||
* @return Generator<QueueEntry>
|
||||
*/
|
||||
function* findFileInBatch(
|
||||
projects,
|
||||
prefix,
|
||||
deletedFiles,
|
||||
blobs,
|
||||
backedUpBlobs
|
||||
) {
|
||||
function* findFileInBatch(projects, prefix, blobs, backedUpBlobs) {
|
||||
for (const project of projects) {
|
||||
const projectIdS = project._id.toString()
|
||||
const historyIdS = project.overleaf.history.id.toString()
|
||||
const projectBlobs = blobs.get(historyIdS) || []
|
||||
const projectBackedUpBlobs = new Set(backedUpBlobs.get(projectIdS) || [])
|
||||
const projectDeletedFiles = deletedFiles.get(projectIdS) || []
|
||||
const ctx = new ProjectContext(
|
||||
project._id,
|
||||
historyIdS,
|
||||
projectBlobs,
|
||||
projectBackedUpBlobs
|
||||
)
|
||||
for (const fileRef of projectDeletedFiles) {
|
||||
const fileId = fileRef._id.toString()
|
||||
if (fileRef.hash) {
|
||||
if (ctx.canSkipProcessingHashedFile(fileRef.hash)) continue
|
||||
ctx.remainingQueueEntries++
|
||||
STATS.filesWithHash++
|
||||
yield {
|
||||
ctx,
|
||||
cacheKey: fileRef.hash,
|
||||
fileId,
|
||||
hash: fileRef.hash,
|
||||
path: MONGO_PATH_SKIP_WRITE_HASH_TO_FILE_TREE,
|
||||
}
|
||||
} else {
|
||||
ctx.remainingQueueEntries++
|
||||
STATS.filesWithoutHash++
|
||||
yield { ctx, cacheKey: fileId, fileId, path: MONGO_PATH_DELETED_FILE }
|
||||
}
|
||||
}
|
||||
for (const blob of projectBlobs) {
|
||||
if (projectBackedUpBlobs.has(blob.getHash())) continue
|
||||
ctx.remainingQueueEntries++
|
||||
@@ -981,41 +924,6 @@ async function collectProjectBlobs(batch) {
|
||||
return await getProjectBlobsBatch(batch.map(p => p.overleaf.history.id))
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Array<Project>} projects
|
||||
* @return {Promise<Map<string, Array<DeletedFileRef>>>}
|
||||
*/
|
||||
async function collectDeletedFiles(projects) {
|
||||
const deletedFiles = new Map()
|
||||
if (!PROCESS_DELETED_FILES) return deletedFiles
|
||||
|
||||
const cursor = deletedFilesCollection.find(
|
||||
{
|
||||
projectId: { $in: projects.map(p => p._id) },
|
||||
...(PROCESS_HASHED_FILES
|
||||
? {}
|
||||
: {
|
||||
hash: { $exists: false },
|
||||
}),
|
||||
},
|
||||
{
|
||||
projection: { _id: 1, projectId: 1, hash: 1 },
|
||||
readPreference: READ_PREFERENCE_SECONDARY,
|
||||
sort: { projectId: 1 },
|
||||
}
|
||||
)
|
||||
for await (const deletedFileRef of cursor) {
|
||||
const projectId = deletedFileRef.projectId.toString()
|
||||
const found = deletedFiles.get(projectId)
|
||||
if (found) {
|
||||
found.push(deletedFileRef)
|
||||
} else {
|
||||
deletedFiles.set(projectId, [deletedFileRef])
|
||||
}
|
||||
}
|
||||
return deletedFiles
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Array<Project>} projects
|
||||
* @return {Promise<{nBackedUpBlobs:number,backedUpBlobs:Map<string,Array<string>>}>}
|
||||
@@ -1043,7 +951,6 @@ async function collectBackedUpBlobs(projects) {
|
||||
const BATCH_HASH_WRITES = 1_000
|
||||
const BATCH_FILE_UPDATES = 100
|
||||
|
||||
const MONGO_PATH_DELETED_FILE = 'deleted-file'
|
||||
const MONGO_PATH_SKIP_WRITE_HASH_TO_FILE_TREE = 'skip-write-to-file-tree'
|
||||
|
||||
class ProjectContext {
|
||||
@@ -1264,9 +1171,7 @@ class ProjectContext {
|
||||
const projectEntries = []
|
||||
const deletedProjectEntries = []
|
||||
for (const entry of this.#pendingFileWrites) {
|
||||
if (entry.path === MONGO_PATH_DELETED_FILE) {
|
||||
individualUpdates.push(entry)
|
||||
} else if (entry.path.startsWith('project.')) {
|
||||
if (entry.path.startsWith('project.')) {
|
||||
deletedProjectEntries.push(entry)
|
||||
} else {
|
||||
projectEntries.push(entry)
|
||||
|
||||
@@ -35,7 +35,6 @@ const { tieringStorageClass } = config.get('backupPersistor')
|
||||
|
||||
const projectsCollection = db.collection('projects')
|
||||
const deletedProjectsCollection = db.collection('deletedProjects')
|
||||
const deletedFilesCollection = db.collection('deletedFiles')
|
||||
|
||||
const FILESTORE_PERSISTOR = ObjectPersistor({
|
||||
backend: 'gcs',
|
||||
@@ -130,11 +129,8 @@ describe('back_fill_file_hash script', function () {
|
||||
const fileId7 = objectIdFromTime('2017-02-01T00:07:00Z')
|
||||
const fileId8 = objectIdFromTime('2017-02-01T00:08:00Z')
|
||||
const fileId9 = objectIdFromTime('2017-02-01T00:09:00Z')
|
||||
const fileIdDeleted1 = objectIdFromTime('2017-03-01T00:01:00Z')
|
||||
const fileIdDeleted2 = objectIdFromTime('2017-03-01T00:02:00Z')
|
||||
const fileIdDeleted3 = objectIdFromTime('2017-03-01T00:03:00Z')
|
||||
const fileIdDeleted4 = objectIdFromTime('2024-03-01T00:04:00Z')
|
||||
const fileIdDeleted5 = objectIdFromTime('2024-03-01T00:05:00Z')
|
||||
const fileId10 = objectIdFromTime('2017-02-01T00:10:00Z')
|
||||
const fileId11 = objectIdFromTime('2017-02-01T00:11:00Z')
|
||||
const contentTextBlob0 = Buffer.from('Hello 0')
|
||||
const hashTextBlob0 = gitBlobHashBuffer(contentTextBlob0)
|
||||
const contentTextBlob1 = Buffer.from('Hello 1')
|
||||
@@ -161,7 +157,6 @@ describe('back_fill_file_hash script', function () {
|
||||
hash: hashFile7,
|
||||
content: contentFile7,
|
||||
},
|
||||
{ projectId: projectId0, historyId: historyId0, fileId: fileIdDeleted5 },
|
||||
{
|
||||
projectId: projectId0,
|
||||
historyId: historyId0,
|
||||
@@ -181,7 +176,6 @@ describe('back_fill_file_hash script', function () {
|
||||
content: contentTextBlob2,
|
||||
},
|
||||
{ projectId: projectId1, historyId: historyId1, fileId: fileId1 },
|
||||
{ projectId: projectId1, historyId: historyId1, fileId: fileIdDeleted1 },
|
||||
{
|
||||
projectId: projectId2,
|
||||
historyId: historyId2,
|
||||
@@ -189,23 +183,28 @@ describe('back_fill_file_hash script', function () {
|
||||
hasHash: true,
|
||||
},
|
||||
{ projectId: projectId3, historyId: historyId3, fileId: fileId3 },
|
||||
// fileId10 is dupe of fileId3, without a hash
|
||||
{
|
||||
projectId: projectId3,
|
||||
historyId: historyId3,
|
||||
fileId: fileId10,
|
||||
content: Buffer.from(fileId3.toString()),
|
||||
hash: gitBlobHash(fileId3),
|
||||
},
|
||||
// fileId11 is dupe of fileId3, but with a hash
|
||||
{
|
||||
projectId: projectId3,
|
||||
historyId: historyId3,
|
||||
fileId: fileId11,
|
||||
content: Buffer.from(fileId3.toString()),
|
||||
hash: gitBlobHash(fileId3),
|
||||
hasHash: true,
|
||||
},
|
||||
{
|
||||
projectId: projectIdDeleted0,
|
||||
historyId: historyIdDeleted0,
|
||||
fileId: fileId4,
|
||||
},
|
||||
{
|
||||
projectId: projectIdDeleted0,
|
||||
historyId: historyIdDeleted0,
|
||||
fileId: fileIdDeleted2,
|
||||
},
|
||||
// { historyId: historyIdDeleted0, fileId:fileIdDeleted3 }, // fileIdDeleted3 is dupe of fileIdDeleted2
|
||||
{
|
||||
projectId: projectIdDeleted0,
|
||||
historyId: historyIdDeleted0,
|
||||
fileId: fileIdDeleted4,
|
||||
hasHash: true,
|
||||
},
|
||||
{
|
||||
projectId: projectIdDeleted1,
|
||||
historyId: historyIdDeleted1,
|
||||
@@ -233,10 +232,6 @@ describe('back_fill_file_hash script', function () {
|
||||
fileId4,
|
||||
fileId5,
|
||||
fileId6,
|
||||
fileIdDeleted1,
|
||||
fileIdDeleted2,
|
||||
fileIdDeleted3,
|
||||
fileIdDeleted4,
|
||||
}
|
||||
console.log({
|
||||
projectId0,
|
||||
@@ -328,7 +323,11 @@ describe('back_fill_file_hash script', function () {
|
||||
fileRefs: [],
|
||||
folders: [
|
||||
{
|
||||
fileRefs: [{ _id: fileId3 }],
|
||||
fileRefs: [
|
||||
{ _id: fileId3 },
|
||||
{ _id: fileId10 },
|
||||
{ _id: fileId11, hash: gitBlobHash(fileId3) },
|
||||
],
|
||||
folders: [],
|
||||
},
|
||||
],
|
||||
@@ -446,17 +445,6 @@ describe('back_fill_file_hash script', function () {
|
||||
},
|
||||
},
|
||||
])
|
||||
await deletedFilesCollection.insertMany([
|
||||
{ _id: fileIdDeleted1, projectId: projectId1 },
|
||||
{ _id: fileIdDeleted2, projectId: projectIdDeleted0 },
|
||||
{ _id: fileIdDeleted3, projectId: projectIdDeleted0 },
|
||||
{
|
||||
_id: fileIdDeleted4,
|
||||
projectId: projectIdDeleted0,
|
||||
hash: gitBlobHash(fileIdDeleted4),
|
||||
},
|
||||
{ _id: fileIdDeleted5, projectId: projectId0 },
|
||||
])
|
||||
}
|
||||
|
||||
async function populateHistoryV1() {
|
||||
@@ -499,11 +487,6 @@ describe('back_fill_file_hash script', function () {
|
||||
`${projectId0}/${fileId7}`,
|
||||
Stream.Readable.from([contentFile7])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectId0}/${fileIdDeleted5}`,
|
||||
Stream.Readable.from([fileIdDeleted5.toString()])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectId1}/${fileId1}`,
|
||||
@@ -519,6 +502,18 @@ describe('back_fill_file_hash script', function () {
|
||||
`${projectId3}/${fileId3}`,
|
||||
Stream.Readable.from([fileId3.toString()])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectId3}/${fileId10}`,
|
||||
// fileId10 is dupe of fileId3
|
||||
Stream.Readable.from([fileId3.toString()])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectId3}/${fileId11}`,
|
||||
// fileId11 is dupe of fileId3
|
||||
Stream.Readable.from([fileId3.toString()])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectIdDeleted0}/${fileId4}`,
|
||||
@@ -529,27 +524,6 @@ describe('back_fill_file_hash script', function () {
|
||||
`${projectIdDeleted1}/${fileId5}`,
|
||||
Stream.Readable.from([fileId5.toString()])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectId1}/${fileIdDeleted1}`,
|
||||
Stream.Readable.from([fileIdDeleted1.toString()])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectIdDeleted0}/${fileIdDeleted2}`,
|
||||
Stream.Readable.from([fileIdDeleted2.toString()])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectIdDeleted0}/${fileIdDeleted3}`,
|
||||
// same content as 2, deduplicate
|
||||
Stream.Readable.from([fileIdDeleted2.toString()])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectIdDeleted0}/${fileIdDeleted4}`,
|
||||
Stream.Readable.from([fileIdDeleted4.toString()])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectIdBadFileTree3}/${fileId9}`,
|
||||
@@ -579,7 +553,6 @@ describe('back_fill_file_hash script', function () {
|
||||
'storage/scripts/back_fill_file_hash.mjs',
|
||||
'--processNonDeletedProjects=true',
|
||||
'--processDeletedProjects=true',
|
||||
'--processDeletedFiles=true',
|
||||
...args,
|
||||
],
|
||||
{
|
||||
@@ -741,6 +714,8 @@ describe('back_fill_file_hash script', function () {
|
||||
{
|
||||
fileRefs: [
|
||||
{ _id: fileId3, hash: gitBlobHash(fileId3) },
|
||||
{ _id: fileId10, hash: gitBlobHash(fileId3) },
|
||||
{ _id: fileId11, hash: gitBlobHash(fileId3) },
|
||||
],
|
||||
folders: [],
|
||||
},
|
||||
@@ -868,34 +843,6 @@ describe('back_fill_file_hash script', function () {
|
||||
},
|
||||
},
|
||||
])
|
||||
expect(await deletedFilesCollection.find({}).toArray()).to.deep.equal([
|
||||
{
|
||||
_id: fileIdDeleted1,
|
||||
projectId: projectId1,
|
||||
hash: gitBlobHash(fileIdDeleted1),
|
||||
},
|
||||
{
|
||||
_id: fileIdDeleted2,
|
||||
projectId: projectIdDeleted0,
|
||||
hash: gitBlobHash(fileIdDeleted2),
|
||||
},
|
||||
{
|
||||
_id: fileIdDeleted3,
|
||||
projectId: projectIdDeleted0,
|
||||
// uses the same content as fileIdDeleted2
|
||||
hash: gitBlobHash(fileIdDeleted2),
|
||||
},
|
||||
{
|
||||
_id: fileIdDeleted4,
|
||||
projectId: projectIdDeleted0,
|
||||
hash: gitBlobHash(fileIdDeleted4),
|
||||
},
|
||||
{
|
||||
_id: fileIdDeleted5,
|
||||
projectId: projectId0,
|
||||
hash: gitBlobHash(fileIdDeleted5),
|
||||
},
|
||||
])
|
||||
expect(
|
||||
(await backedUpBlobs.find({}, { sort: { _id: 1 } }).toArray()).map(
|
||||
entry => {
|
||||
@@ -910,7 +857,6 @@ describe('back_fill_file_hash script', function () {
|
||||
blobs: [
|
||||
binaryForGitBlobHash(gitBlobHash(fileId0)),
|
||||
binaryForGitBlobHash(hashFile7),
|
||||
binaryForGitBlobHash(gitBlobHash(fileIdDeleted5)),
|
||||
binaryForGitBlobHash(hashTextBlob0),
|
||||
].sort(),
|
||||
},
|
||||
@@ -918,7 +864,6 @@ describe('back_fill_file_hash script', function () {
|
||||
_id: projectId1,
|
||||
blobs: [
|
||||
binaryForGitBlobHash(gitBlobHash(fileId1)),
|
||||
binaryForGitBlobHash(gitBlobHash(fileIdDeleted1)),
|
||||
binaryForGitBlobHash(hashTextBlob1),
|
||||
].sort(),
|
||||
},
|
||||
@@ -934,16 +879,7 @@ describe('back_fill_file_hash script', function () {
|
||||
},
|
||||
{
|
||||
_id: projectIdDeleted0,
|
||||
blobs: [
|
||||
binaryForGitBlobHash(gitBlobHash(fileId4)),
|
||||
binaryForGitBlobHash(gitBlobHash(fileIdDeleted2)),
|
||||
]
|
||||
.concat(
|
||||
processHashedFiles
|
||||
? [binaryForGitBlobHash(gitBlobHash(fileIdDeleted4))]
|
||||
: []
|
||||
)
|
||||
.sort(),
|
||||
blobs: [binaryForGitBlobHash(gitBlobHash(fileId4))].sort(),
|
||||
},
|
||||
{
|
||||
_id: projectId3,
|
||||
@@ -971,11 +907,15 @@ describe('back_fill_file_hash script', function () {
|
||||
expect(tieringStorageClass).to.exist
|
||||
const blobs = await listS3Bucket(projectBlobsBucket, tieringStorageClass)
|
||||
expect(blobs.sort()).to.deep.equal(
|
||||
writtenBlobs
|
||||
.map(({ historyId, fileId, hash }) =>
|
||||
makeProjectKey(historyId, hash || gitBlobHash(fileId))
|
||||
Array.from(
|
||||
new Set(
|
||||
writtenBlobs
|
||||
.map(({ historyId, fileId, hash }) =>
|
||||
makeProjectKey(historyId, hash || gitBlobHash(fileId))
|
||||
)
|
||||
.sort()
|
||||
)
|
||||
.sort()
|
||||
)
|
||||
)
|
||||
for (let { historyId, fileId, hash, content } of writtenBlobs) {
|
||||
hash = hash || gitBlobHash(fileId.toString())
|
||||
@@ -1037,15 +977,15 @@ describe('back_fill_file_hash script', function () {
|
||||
...STATS_ALL_ZERO,
|
||||
// We still need to iterate over all the projects and blobs.
|
||||
projects: 10,
|
||||
blobs: 13,
|
||||
backedUpBlobs: 13,
|
||||
blobs: 10,
|
||||
backedUpBlobs: 10,
|
||||
badFileTrees: 4,
|
||||
}
|
||||
if (processHashedFiles) {
|
||||
stats = sumStats(stats, {
|
||||
...STATS_ALL_ZERO,
|
||||
blobs: 3,
|
||||
backedUpBlobs: 3,
|
||||
blobs: 2,
|
||||
backedUpBlobs: 2,
|
||||
})
|
||||
}
|
||||
expect(rerun.stats).deep.equal(stats)
|
||||
@@ -1101,7 +1041,7 @@ describe('back_fill_file_hash script', function () {
|
||||
blobs: 2,
|
||||
backedUpBlobs: 0,
|
||||
filesWithHash: 0,
|
||||
filesWithoutHash: 7,
|
||||
filesWithoutHash: 5,
|
||||
filesDuplicated: 1,
|
||||
filesRetries: 0,
|
||||
filesFailed: 0,
|
||||
@@ -1112,24 +1052,24 @@ describe('back_fill_file_hash script', function () {
|
||||
projectHardDeleted: 0,
|
||||
fileHardDeleted: 0,
|
||||
badFileTrees: 0,
|
||||
mongoUpdates: 6,
|
||||
mongoUpdates: 4,
|
||||
deduplicatedWriteToAWSLocalCount: 0,
|
||||
deduplicatedWriteToAWSLocalEgress: 0,
|
||||
deduplicatedWriteToAWSRemoteCount: 0,
|
||||
deduplicatedWriteToAWSRemoteEgress: 0,
|
||||
readFromGCSCount: 8,
|
||||
readFromGCSIngress: 4000134,
|
||||
writeToAWSCount: 7,
|
||||
writeToAWSEgress: 4086,
|
||||
writeToGCSCount: 5,
|
||||
writeToGCSEgress: 4000096,
|
||||
readFromGCSCount: 6,
|
||||
readFromGCSIngress: 4000086,
|
||||
writeToAWSCount: 5,
|
||||
writeToAWSEgress: 4026,
|
||||
writeToGCSCount: 3,
|
||||
writeToGCSEgress: 4000048,
|
||||
}
|
||||
const STATS_UP_FROM_PROJECT1_ONWARD = {
|
||||
projects: 8,
|
||||
blobs: 2,
|
||||
backedUpBlobs: 0,
|
||||
filesWithHash: 0,
|
||||
filesWithoutHash: 5,
|
||||
filesWithoutHash: 4,
|
||||
filesDuplicated: 0,
|
||||
filesRetries: 0,
|
||||
filesFailed: 0,
|
||||
@@ -1140,28 +1080,28 @@ describe('back_fill_file_hash script', function () {
|
||||
projectHardDeleted: 0,
|
||||
fileHardDeleted: 0,
|
||||
badFileTrees: 4,
|
||||
mongoUpdates: 10,
|
||||
mongoUpdates: 8,
|
||||
deduplicatedWriteToAWSLocalCount: 1,
|
||||
deduplicatedWriteToAWSLocalEgress: 30,
|
||||
deduplicatedWriteToAWSRemoteCount: 0,
|
||||
deduplicatedWriteToAWSRemoteEgress: 0,
|
||||
readFromGCSCount: 7,
|
||||
readFromGCSIngress: 134,
|
||||
writeToAWSCount: 6,
|
||||
writeToAWSEgress: 173,
|
||||
writeToGCSCount: 4,
|
||||
writeToGCSEgress: 96,
|
||||
readFromGCSCount: 6,
|
||||
readFromGCSIngress: 110,
|
||||
writeToAWSCount: 5,
|
||||
writeToAWSEgress: 143,
|
||||
writeToGCSCount: 3,
|
||||
writeToGCSEgress: 72,
|
||||
}
|
||||
const STATS_FILES_HASHED_EXTRA = {
|
||||
...STATS_ALL_ZERO,
|
||||
filesWithHash: 3,
|
||||
mongoUpdates: 1,
|
||||
readFromGCSCount: 3,
|
||||
readFromGCSIngress: 72,
|
||||
writeToAWSCount: 3,
|
||||
writeToAWSEgress: 89,
|
||||
writeToGCSCount: 3,
|
||||
writeToGCSEgress: 72,
|
||||
filesWithHash: 2,
|
||||
mongoUpdates: 2,
|
||||
readFromGCSCount: 2,
|
||||
readFromGCSIngress: 48,
|
||||
writeToAWSCount: 2,
|
||||
writeToAWSEgress: 60,
|
||||
writeToGCSCount: 2,
|
||||
writeToGCSEgress: 48,
|
||||
}
|
||||
|
||||
function sumStats(a, b) {
|
||||
@@ -1331,10 +1271,9 @@ describe('back_fill_file_hash script', function () {
|
||||
expect(output2.stats).deep.equal({
|
||||
...STATS_FILES_HASHED_EXTRA,
|
||||
projects: 10,
|
||||
blobs: 13,
|
||||
backedUpBlobs: 13,
|
||||
blobs: 10,
|
||||
backedUpBlobs: 10,
|
||||
badFileTrees: 4,
|
||||
mongoUpdates: 3,
|
||||
})
|
||||
})
|
||||
commonAssertions(true)
|
||||
@@ -1376,7 +1315,15 @@ describe('back_fill_file_hash script', function () {
|
||||
})
|
||||
it('should print stats', function () {
|
||||
expect(output.stats).deep.equal(
|
||||
sumStats(STATS_ALL, STATS_FILES_HASHED_EXTRA)
|
||||
sumStats(STATS_ALL, {
|
||||
...STATS_FILES_HASHED_EXTRA,
|
||||
readFromGCSCount: 3,
|
||||
readFromGCSIngress: 72,
|
||||
deduplicatedWriteToAWSLocalCount: 1,
|
||||
deduplicatedWriteToAWSLocalEgress: 30,
|
||||
mongoUpdates: 1,
|
||||
filesWithHash: 3,
|
||||
})
|
||||
)
|
||||
})
|
||||
commonAssertions(true)
|
||||
|
||||
@@ -17,7 +17,6 @@ const MONGO_COLLECTIONS = [
|
||||
'projectHistoryChunks',
|
||||
|
||||
// back_fill_file_hash.test.mjs
|
||||
'deletedFiles',
|
||||
'deletedProjects',
|
||||
'projects',
|
||||
'projectHistoryBackedUpBlobs',
|
||||
|
||||
@@ -324,19 +324,6 @@ async function undeleteProject(projectId, options = {}) {
|
||||
})
|
||||
restored.deletedDocs = []
|
||||
}
|
||||
if (restored.deletedFiles && restored.deletedFiles.length > 0) {
|
||||
filterDuplicateDeletedFilesInPlace(restored)
|
||||
const deletedFiles = restored.deletedFiles.map(file => {
|
||||
// break free from the model
|
||||
file = file.toObject()
|
||||
|
||||
// add projectId
|
||||
file.projectId = projectId
|
||||
return file
|
||||
})
|
||||
await db.deletedFiles.insertMany(deletedFiles)
|
||||
restored.deletedFiles = []
|
||||
}
|
||||
|
||||
// we can't use Mongoose to re-insert the project, as it won't
|
||||
// create a new document with an _id already specified. We need to
|
||||
@@ -388,7 +375,6 @@ async function expireDeletedProject(projectId) {
|
||||
),
|
||||
FilestoreHandler.promises.deleteProject(deletedProject.project._id),
|
||||
ChatApiHandler.promises.destroyProject(deletedProject.project._id),
|
||||
hardDeleteDeletedFiles(deletedProject.project._id),
|
||||
ProjectAuditLogEntry.deleteMany({ projectId }),
|
||||
Modules.promises.hooks.fire('projectExpired', deletedProject.project._id),
|
||||
])
|
||||
@@ -409,31 +395,3 @@ async function expireDeletedProject(projectId) {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
function filterDuplicateDeletedFilesInPlace(project) {
|
||||
const fileIds = new Set()
|
||||
project.deletedFiles = project.deletedFiles.filter(file => {
|
||||
const id = file._id.toString()
|
||||
if (fileIds.has(id)) return false
|
||||
fileIds.add(id)
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
let deletedFilesProjectIdIndexExist
|
||||
async function doesDeletedFilesProjectIdIndexExist() {
|
||||
if (typeof deletedFilesProjectIdIndexExist !== 'boolean') {
|
||||
// Resolve this about once. No need for locking or retry handling.
|
||||
deletedFilesProjectIdIndexExist =
|
||||
await db.deletedFiles.indexExists('projectId_1')
|
||||
}
|
||||
return deletedFilesProjectIdIndexExist
|
||||
}
|
||||
|
||||
async function hardDeleteDeletedFiles(projectId) {
|
||||
if (!(await doesDeletedFilesProjectIdIndexExist())) {
|
||||
// Running the deletion command w/o index would kill mongo performance
|
||||
return
|
||||
}
|
||||
return db.deletedFiles.deleteMany({ projectId })
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@ const ProjectGetter = require('./ProjectGetter')
|
||||
const ProjectLocator = require('./ProjectLocator')
|
||||
const FolderStructureBuilder = require('./FolderStructureBuilder')
|
||||
const SafePath = require('./SafePath')
|
||||
const { DeletedFile } = require('../../models/DeletedFile')
|
||||
const { iterablePaths } = require('./IterablePath')
|
||||
|
||||
const LOCK_NAMESPACE = 'mongoTransaction'
|
||||
@@ -72,7 +71,6 @@ module.exports = {
|
||||
'changes',
|
||||
]),
|
||||
createNewFolderStructure: callbackify(wrapWithLock(createNewFolderStructure)),
|
||||
_insertDeletedFileReference: callbackify(_insertDeletedFileReference),
|
||||
_putElement: callbackifyMultiResult(_putElement, ['result', 'project']),
|
||||
_confirmFolder,
|
||||
promises: {
|
||||
@@ -87,7 +85,6 @@ module.exports = {
|
||||
deleteEntity: wrapWithLock(deleteEntity),
|
||||
renameEntity: wrapWithLock(renameEntity),
|
||||
createNewFolderStructure: wrapWithLock(createNewFolderStructure),
|
||||
_insertDeletedFileReference,
|
||||
_putElement,
|
||||
},
|
||||
}
|
||||
@@ -162,7 +159,6 @@ async function replaceFileWithNew(projectId, fileId, newFileRef, userId) {
|
||||
element_id: fileId,
|
||||
type: 'file',
|
||||
})
|
||||
await _insertDeletedFileReference(projectId, fileRef)
|
||||
const newProject = await Project.findOneAndUpdate(
|
||||
{ _id: project._id, [path.mongo]: { $exists: true } },
|
||||
{
|
||||
@@ -480,17 +476,6 @@ async function renameEntity(projectId, entityId, entityType, newName, userId) {
|
||||
}
|
||||
}
|
||||
|
||||
async function _insertDeletedFileReference(projectId, fileRef) {
|
||||
await DeletedFile.create({
|
||||
projectId,
|
||||
_id: fileRef._id,
|
||||
name: fileRef.name,
|
||||
linkedFileData: fileRef.linkedFileData,
|
||||
hash: fileRef.hash,
|
||||
deletedAt: new Date(),
|
||||
})
|
||||
}
|
||||
|
||||
async function _removeElementFromMongoArray(
|
||||
modelId,
|
||||
path,
|
||||
|
||||
@@ -1627,8 +1627,6 @@ const ProjectEntityUpdateHandler = {
|
||||
entry.path,
|
||||
userId
|
||||
)
|
||||
} else if (entry.type === 'file') {
|
||||
await ProjectEntityUpdateHandler._cleanUpFile(project, entry.entity)
|
||||
}
|
||||
}
|
||||
return subtreeListing
|
||||
@@ -1679,13 +1677,6 @@ const ProjectEntityUpdateHandler = {
|
||||
|
||||
return await DocumentUpdaterHandler.promises.deleteDoc(projectId, docId)
|
||||
},
|
||||
|
||||
async _cleanUpFile(project, file) {
|
||||
return await ProjectEntityMongoUpdateHandler.promises._insertDeletedFileReference(
|
||||
project._id,
|
||||
file
|
||||
)
|
||||
},
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -33,7 +33,6 @@ addConnectionDrainer('mongodb', async () => {
|
||||
const internalDb = mongoClient.db()
|
||||
const db = {
|
||||
contacts: internalDb.collection('contacts'),
|
||||
deletedFiles: internalDb.collection('deletedFiles'),
|
||||
deletedProjects: internalDb.collection('deletedProjects'),
|
||||
deletedSubscriptions: internalDb.collection('deletedSubscriptions'),
|
||||
deletedUsers: internalDb.collection('deletedUsers'),
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
const mongoose = require('../infrastructure/Mongoose')
|
||||
const { Schema } = mongoose
|
||||
|
||||
const DeletedFileSchema = new Schema(
|
||||
{
|
||||
name: String,
|
||||
projectId: Schema.ObjectId,
|
||||
created: {
|
||||
type: Date,
|
||||
},
|
||||
linkedFileData: { type: Schema.Types.Mixed },
|
||||
hash: {
|
||||
type: String,
|
||||
},
|
||||
deletedAt: { type: Date },
|
||||
},
|
||||
{ collection: 'deletedFiles', minimize: false }
|
||||
)
|
||||
|
||||
exports.DeletedFile = mongoose.model('DeletedFile', DeletedFileSchema)
|
||||
exports.DeletedFileSchema = DeletedFileSchema
|
||||
@@ -12,18 +12,6 @@ const DeletedDocSchema = new Schema({
|
||||
deletedAt: { type: Date },
|
||||
})
|
||||
|
||||
const DeletedFileSchema = new Schema({
|
||||
name: String,
|
||||
created: {
|
||||
type: Date,
|
||||
},
|
||||
linkedFileData: { type: Schema.Types.Mixed },
|
||||
hash: {
|
||||
type: String,
|
||||
},
|
||||
deletedAt: { type: Date },
|
||||
})
|
||||
|
||||
const ProjectSchema = new Schema(
|
||||
{
|
||||
name: { type: String, default: 'new project' },
|
||||
@@ -54,7 +42,6 @@ const ProjectSchema = new Schema(
|
||||
archived: { type: Schema.Types.Mixed },
|
||||
trashed: [{ type: ObjectId, ref: 'User' }],
|
||||
deletedDocs: [DeletedDocSchema],
|
||||
deletedFiles: [DeletedFileSchema],
|
||||
imageName: { type: String },
|
||||
brandVariationId: { type: String },
|
||||
track_changes: { type: Object },
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import Helpers from './lib/helpers.mjs'
|
||||
import { getCollectionInternal } from '../app/src/infrastructure/mongodb.js'
|
||||
|
||||
const tags = ['server-ce', 'server-pro', 'saas']
|
||||
|
||||
@@ -11,14 +12,17 @@ const indexes = [
|
||||
},
|
||||
]
|
||||
|
||||
const migrate = async client => {
|
||||
const { db } = client
|
||||
await Helpers.addIndexesToCollection(db.deletedFiles, indexes)
|
||||
async function getCollection() {
|
||||
// NOTE: The deletedFiles collection is not available to the application as it has been retired. Fetch it here.
|
||||
return await getCollectionInternal('deletedFiles')
|
||||
}
|
||||
|
||||
const rollback = async client => {
|
||||
const { db } = client
|
||||
await Helpers.dropIndexesFromCollection(db.deletedFiles, indexes)
|
||||
const migrate = async () => {
|
||||
await Helpers.addIndexesToCollection(await getCollection(), indexes)
|
||||
}
|
||||
|
||||
const rollback = async () => {
|
||||
await Helpers.dropIndexesFromCollection(await getCollection(), indexes)
|
||||
}
|
||||
|
||||
export default {
|
||||
|
||||
@@ -1,14 +1,8 @@
|
||||
import runScript from '../scripts/back_fill_deleted_files.mjs'
|
||||
|
||||
const tags = ['server-ce', 'server-pro', 'saas']
|
||||
|
||||
const migrate = async client => {
|
||||
const options = {
|
||||
performCleanup: true,
|
||||
letUserDoubleCheckInputsFor: 10,
|
||||
fixPartialInserts: true,
|
||||
}
|
||||
await runScript(options)
|
||||
// Skip back-filling. The deletedFiles collection will be deleted in a following migration.
|
||||
// The projects.deletedFiles array will be purged as part of the later migration as well.
|
||||
}
|
||||
|
||||
const rollback = async client => {}
|
||||
|
||||
50
services/web/migrations/20250519101127_drop_deletedFiles.mjs
Normal file
50
services/web/migrations/20250519101127_drop_deletedFiles.mjs
Normal file
@@ -0,0 +1,50 @@
|
||||
import Helpers from './lib/helpers.mjs'
|
||||
import { getCollectionInternal, db } from '../app/src/infrastructure/mongodb.js'
|
||||
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
|
||||
|
||||
const tags = ['server-ce', 'server-pro', 'saas']
|
||||
|
||||
const indexes = [
|
||||
{
|
||||
key: {
|
||||
projectId: 1,
|
||||
},
|
||||
name: 'projectId_1',
|
||||
},
|
||||
]
|
||||
|
||||
async function getCollection() {
|
||||
// NOTE: The deletedFiles collection is not available to the application as it has been retired. Fetch it here.
|
||||
return await getCollectionInternal('deletedFiles')
|
||||
}
|
||||
|
||||
const migrate = async () => {
|
||||
const collection = await getCollection()
|
||||
|
||||
// Purge legacy deletedFiles array from project records.
|
||||
await batchedUpdate(
|
||||
db.projects,
|
||||
{ deletedFiles: { $exists: true } },
|
||||
{ $unset: { deletedFiles: 1 } }
|
||||
)
|
||||
|
||||
// Purge legacy deletedFiles array from soft-deleted project records.
|
||||
await batchedUpdate(
|
||||
db.deletedProjects,
|
||||
{ 'project.deletedFiles': { $exists: true } },
|
||||
{ $unset: { 'project.deletedFiles': 1 } }
|
||||
)
|
||||
|
||||
// Drop historic deletedFiles records
|
||||
await collection.drop()
|
||||
}
|
||||
|
||||
const rollback = async () => {
|
||||
await Helpers.addIndexesToCollection(await getCollection(), indexes)
|
||||
}
|
||||
|
||||
export default {
|
||||
tags,
|
||||
migrate,
|
||||
rollback,
|
||||
}
|
||||
@@ -1,133 +0,0 @@
|
||||
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
|
||||
import { promiseMapWithLimit, promisify } from '@overleaf/promise-utils'
|
||||
import { db } from '../app/src/infrastructure/mongodb.js'
|
||||
import _ from 'lodash'
|
||||
import { fileURLToPath } from 'node:url'
|
||||
|
||||
const sleep = promisify(setTimeout)
|
||||
|
||||
async function main(options) {
|
||||
if (!options) {
|
||||
options = {}
|
||||
}
|
||||
_.defaults(options, {
|
||||
writeConcurrency: parseInt(process.env.WRITE_CONCURRENCY, 10) || 10,
|
||||
performCleanup: process.argv.includes('--perform-cleanup'),
|
||||
fixPartialInserts: process.argv.includes('--fix-partial-inserts'),
|
||||
letUserDoubleCheckInputsFor: parseInt(
|
||||
process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR || 10 * 1000,
|
||||
10
|
||||
),
|
||||
})
|
||||
|
||||
await letUserDoubleCheckInputs(options)
|
||||
|
||||
await batchedUpdate(
|
||||
db.projects,
|
||||
// array is not empty ~ array has one item
|
||||
{ 'deletedFiles.0': { $exists: true } },
|
||||
async projects => {
|
||||
await processBatch(projects, options)
|
||||
},
|
||||
{ _id: 1, deletedFiles: 1 }
|
||||
)
|
||||
}
|
||||
|
||||
async function processBatch(projects, options) {
|
||||
await promiseMapWithLimit(
|
||||
options.writeConcurrency,
|
||||
projects,
|
||||
async project => {
|
||||
await processProject(project, options)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
async function processProject(project, options) {
|
||||
await backFillFiles(project, options)
|
||||
|
||||
if (options.performCleanup) {
|
||||
await cleanupProject(project)
|
||||
}
|
||||
}
|
||||
|
||||
async function backFillFiles(project, options) {
|
||||
const projectId = project._id
|
||||
filterDuplicatesInPlace(project)
|
||||
project.deletedFiles.forEach(file => {
|
||||
file.projectId = projectId
|
||||
})
|
||||
|
||||
if (options.fixPartialInserts) {
|
||||
await fixPartialInserts(project)
|
||||
} else {
|
||||
await db.deletedFiles.insertMany(project.deletedFiles)
|
||||
}
|
||||
}
|
||||
|
||||
function filterDuplicatesInPlace(project) {
|
||||
const fileIds = new Set()
|
||||
project.deletedFiles = project.deletedFiles.filter(file => {
|
||||
const id = file._id.toString()
|
||||
if (fileIds.has(id)) return false
|
||||
fileIds.add(id)
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
async function fixPartialInserts(project) {
|
||||
const seenFileIds = new Set(
|
||||
(
|
||||
await db.deletedFiles
|
||||
.find(
|
||||
{ _id: { $in: project.deletedFiles.map(file => file._id) } },
|
||||
{ projection: { _id: 1 } }
|
||||
)
|
||||
.toArray()
|
||||
).map(file => file._id.toString())
|
||||
)
|
||||
project.deletedFiles = project.deletedFiles.filter(file => {
|
||||
const id = file._id.toString()
|
||||
if (seenFileIds.has(id)) return false
|
||||
seenFileIds.add(id)
|
||||
return true
|
||||
})
|
||||
if (project.deletedFiles.length > 0) {
|
||||
await db.deletedFiles.insertMany(project.deletedFiles)
|
||||
}
|
||||
}
|
||||
|
||||
async function cleanupProject(project) {
|
||||
await db.projects.updateOne(
|
||||
{ _id: project._id },
|
||||
{ $set: { deletedFiles: [] } }
|
||||
)
|
||||
}
|
||||
|
||||
async function letUserDoubleCheckInputs(options) {
|
||||
if (options.performCleanup) {
|
||||
console.error('BACK FILLING AND PERFORMING CLEANUP')
|
||||
} else {
|
||||
console.error(
|
||||
'BACK FILLING ONLY - You will need to rerun with --perform-cleanup'
|
||||
)
|
||||
}
|
||||
console.error(
|
||||
'Waiting for you to double check inputs for',
|
||||
options.letUserDoubleCheckInputsFor,
|
||||
'ms'
|
||||
)
|
||||
await sleep(options.letUserDoubleCheckInputsFor)
|
||||
}
|
||||
|
||||
export default main
|
||||
|
||||
if (fileURLToPath(import.meta.url) === process.argv[1]) {
|
||||
try {
|
||||
await main()
|
||||
process.exit(0)
|
||||
} catch (error) {
|
||||
console.error({ error })
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
@@ -19,7 +19,6 @@ async function countFiles() {
|
||||
console.error(
|
||||
projectId,
|
||||
files.length,
|
||||
(project.deletedFiles && project.deletedFiles.length) || 0,
|
||||
docs.length,
|
||||
(project.deletedDocs && project.deletedDocs.length) || 0
|
||||
)
|
||||
|
||||
@@ -43,7 +43,6 @@ async function countProjectFiles() {
|
||||
console.error(
|
||||
projectId,
|
||||
files.length,
|
||||
(project.deletedFiles && project.deletedFiles.length) || 0,
|
||||
docs.length,
|
||||
(project.deletedDocs && project.deletedDocs.length) || 0,
|
||||
fileSize,
|
||||
|
||||
@@ -1,176 +0,0 @@
|
||||
import { exec } from 'node:child_process'
|
||||
import { promisify } from 'node:util'
|
||||
import { expect } from 'chai'
|
||||
import logger from '@overleaf/logger'
|
||||
import { db, ObjectId } from '../../../app/src/infrastructure/mongodb.js'
|
||||
import UserHelper from './helpers/User.mjs'
|
||||
|
||||
const User = UserHelper.promises
|
||||
|
||||
async function getDeletedFiles(projectId) {
|
||||
return (await db.projects.findOne({ _id: projectId })).deletedFiles
|
||||
}
|
||||
|
||||
async function setDeletedFiles(projectId, deletedFiles) {
|
||||
await db.projects.updateOne({ _id: projectId }, { $set: { deletedFiles } })
|
||||
}
|
||||
|
||||
async function unsetDeletedFiles(projectId) {
|
||||
await db.projects.updateOne(
|
||||
{ _id: projectId },
|
||||
{ $unset: { deletedFiles: 1 } }
|
||||
)
|
||||
}
|
||||
|
||||
describe('BackFillDeletedFiles', function () {
|
||||
let user, projectId1, projectId2, projectId3, projectId4, projectId5
|
||||
|
||||
beforeEach('create projects', async function () {
|
||||
user = new User()
|
||||
await user.login()
|
||||
|
||||
projectId1 = new ObjectId(await user.createProject('project1'))
|
||||
projectId2 = new ObjectId(await user.createProject('project2'))
|
||||
projectId3 = new ObjectId(await user.createProject('project3'))
|
||||
projectId4 = new ObjectId(await user.createProject('project4'))
|
||||
projectId5 = new ObjectId(await user.createProject('project5'))
|
||||
})
|
||||
|
||||
let fileId1, fileId2, fileId3, fileId4
|
||||
beforeEach('create files', function () {
|
||||
// take a short cut and just allocate file ids
|
||||
fileId1 = new ObjectId()
|
||||
fileId2 = new ObjectId()
|
||||
fileId3 = new ObjectId()
|
||||
fileId4 = new ObjectId()
|
||||
})
|
||||
const otherFileDetails = {
|
||||
name: 'universe.jpg',
|
||||
linkedFileData: null,
|
||||
hash: 'ed19e7d6779b47d8c63f6fa5a21954dcfb6cac00',
|
||||
deletedAt: new Date(),
|
||||
__v: 0,
|
||||
}
|
||||
let deletedFiles1, deletedFiles2, deletedFiles3
|
||||
beforeEach('set deletedFiles details', async function () {
|
||||
deletedFiles1 = [
|
||||
{ _id: fileId1, ...otherFileDetails },
|
||||
{ _id: fileId2, ...otherFileDetails },
|
||||
]
|
||||
deletedFiles2 = [{ _id: fileId3, ...otherFileDetails }]
|
||||
await setDeletedFiles(projectId1, deletedFiles1)
|
||||
await setDeletedFiles(projectId2, deletedFiles2)
|
||||
|
||||
// a project without deletedFiles entries
|
||||
await setDeletedFiles(projectId3, [])
|
||||
// a project without deletedFiles array
|
||||
await unsetDeletedFiles(projectId4)
|
||||
// duplicate entry
|
||||
deletedFiles3 = [
|
||||
{ _id: fileId4, ...otherFileDetails },
|
||||
{ _id: fileId4, ...otherFileDetails },
|
||||
]
|
||||
await setDeletedFiles(projectId5, deletedFiles3)
|
||||
})
|
||||
|
||||
async function runScript(args = []) {
|
||||
let result
|
||||
try {
|
||||
result = await promisify(exec)(
|
||||
['LET_USER_DOUBLE_CHECK_INPUTS_FOR=1', 'VERBOSE_LOGGING=true']
|
||||
.concat(['node', 'scripts/back_fill_deleted_files.mjs'])
|
||||
.concat(args)
|
||||
.join(' ')
|
||||
)
|
||||
} catch (error) {
|
||||
// dump details like exit code, stdErr and stdOut
|
||||
logger.error({ error }, 'script failed')
|
||||
throw error
|
||||
}
|
||||
const { stdout: stdOut } = result
|
||||
|
||||
expect(stdOut).to.match(
|
||||
new RegExp(`Running update on batch with ids .+${projectId1}`)
|
||||
)
|
||||
expect(stdOut).to.match(
|
||||
new RegExp(`Running update on batch with ids .+${projectId2}`)
|
||||
)
|
||||
expect(stdOut).to.not.match(
|
||||
new RegExp(`Running update on batch with ids .+${projectId3}`)
|
||||
)
|
||||
expect(stdOut).to.not.match(
|
||||
new RegExp(`Running update on batch with ids .+${projectId4}`)
|
||||
)
|
||||
expect(stdOut).to.match(
|
||||
new RegExp(`Running update on batch with ids .+${projectId5}`)
|
||||
)
|
||||
}
|
||||
|
||||
function checkAreFilesBackFilled() {
|
||||
it('should back fill file and set projectId', async function () {
|
||||
const docs = await db.deletedFiles
|
||||
.find({}, { sort: { _id: 1 } })
|
||||
.toArray()
|
||||
expect(docs).to.deep.equal([
|
||||
{ _id: fileId1, projectId: projectId1, ...otherFileDetails },
|
||||
{ _id: fileId2, projectId: projectId1, ...otherFileDetails },
|
||||
{ _id: fileId3, projectId: projectId2, ...otherFileDetails },
|
||||
{ _id: fileId4, projectId: projectId5, ...otherFileDetails },
|
||||
])
|
||||
})
|
||||
}
|
||||
|
||||
describe('back fill only', function () {
|
||||
beforeEach('run script', runScript)
|
||||
|
||||
checkAreFilesBackFilled()
|
||||
|
||||
it('should leave the deletedFiles as is', async function () {
|
||||
expect(await getDeletedFiles(projectId1)).to.deep.equal(deletedFiles1)
|
||||
expect(await getDeletedFiles(projectId2)).to.deep.equal(deletedFiles2)
|
||||
expect(await getDeletedFiles(projectId5)).to.deep.equal(deletedFiles3)
|
||||
})
|
||||
})
|
||||
|
||||
describe('back fill and cleanup', function () {
|
||||
beforeEach('run script with cleanup flag', async function () {
|
||||
await runScript(['--perform-cleanup'])
|
||||
})
|
||||
|
||||
checkAreFilesBackFilled()
|
||||
|
||||
it('should cleanup the deletedFiles', async function () {
|
||||
expect(await getDeletedFiles(projectId1)).to.deep.equal([])
|
||||
expect(await getDeletedFiles(projectId2)).to.deep.equal([])
|
||||
expect(await getDeletedFiles(projectId5)).to.deep.equal([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('fix partial inserts and cleanup', function () {
|
||||
beforeEach('simulate one missing insert', async function () {
|
||||
await setDeletedFiles(projectId1, [deletedFiles1[0]])
|
||||
})
|
||||
beforeEach('run script with cleanup flag', async function () {
|
||||
await runScript(['--perform-cleanup'])
|
||||
})
|
||||
beforeEach('add case for one missing file', async function () {
|
||||
await setDeletedFiles(projectId1, deletedFiles1)
|
||||
})
|
||||
beforeEach('add cases for no more files to insert', async function () {
|
||||
await setDeletedFiles(projectId2, deletedFiles2)
|
||||
await setDeletedFiles(projectId5, deletedFiles3)
|
||||
})
|
||||
|
||||
beforeEach('fixing partial insert and cleanup', async function () {
|
||||
await runScript(['--fix-partial-inserts', '--perform-cleanup'])
|
||||
})
|
||||
|
||||
checkAreFilesBackFilled()
|
||||
|
||||
it('should cleanup the deletedFiles', async function () {
|
||||
expect(await getDeletedFiles(projectId1)).to.deep.equal([])
|
||||
expect(await getDeletedFiles(projectId2)).to.deep.equal([])
|
||||
expect(await getDeletedFiles(projectId5)).to.deep.equal([])
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -252,81 +252,6 @@ describe('Deleting a project', function () {
|
||||
})
|
||||
})
|
||||
|
||||
describe('when the project has deleted files', function () {
|
||||
beforeEach('get rootFolder id', function (done) {
|
||||
this.user.getProject(this.projectId, (error, project) => {
|
||||
if (error) return done(error)
|
||||
this.rootFolder = project.rootFolder[0]._id
|
||||
done()
|
||||
})
|
||||
})
|
||||
|
||||
let allFileIds
|
||||
beforeEach('reset allFileIds', function () {
|
||||
allFileIds = []
|
||||
})
|
||||
function createAndDeleteFile(name) {
|
||||
let fileId
|
||||
beforeEach(`create file ${name}`, function (done) {
|
||||
this.user.uploadExampleFileInProject(
|
||||
this.projectId,
|
||||
this.rootFolder,
|
||||
name,
|
||||
(error, theFileId) => {
|
||||
fileId = theFileId
|
||||
allFileIds.push(theFileId)
|
||||
done(error)
|
||||
}
|
||||
)
|
||||
})
|
||||
beforeEach(`delete file ${name}`, function (done) {
|
||||
this.user.deleteItemInProject(this.projectId, 'file', fileId, done)
|
||||
})
|
||||
}
|
||||
for (const name of ['a.png', 'another.png']) {
|
||||
createAndDeleteFile(name)
|
||||
}
|
||||
|
||||
it('should have two deleteFiles entries', async function () {
|
||||
const files = await db.deletedFiles
|
||||
.find({}, { sort: { _id: 1 } })
|
||||
.toArray()
|
||||
expect(files).to.have.length(2)
|
||||
expect(files.map(file => file._id.toString())).to.deep.equal(allFileIds)
|
||||
})
|
||||
|
||||
describe('When the deleted project is expired', function () {
|
||||
beforeEach('soft delete the project', function (done) {
|
||||
this.user.deleteProject(this.projectId, done)
|
||||
})
|
||||
beforeEach('hard delete the project', function (done) {
|
||||
request.post(
|
||||
`/internal/project/${this.projectId}/expire-deleted-project`,
|
||||
{
|
||||
auth: {
|
||||
user: settings.apis.web.user,
|
||||
pass: settings.apis.web.pass,
|
||||
sendImmediately: true,
|
||||
},
|
||||
},
|
||||
(error, res) => {
|
||||
expect(error).not.to.exist
|
||||
expect(res.statusCode).to.equal(200)
|
||||
|
||||
done()
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
it('should cleanup the deleteFiles', async function () {
|
||||
const files = await db.deletedFiles
|
||||
.find({}, { sort: { _id: 1 } })
|
||||
.toArray()
|
||||
expect(files).to.deep.equal([])
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('When the project has docs', function () {
|
||||
beforeEach(function (done) {
|
||||
this.user.getProject(this.projectId, (error, project) => {
|
||||
|
||||
@@ -99,10 +99,6 @@ describe('ProjectDeleter', function () {
|
||||
}
|
||||
|
||||
this.db = {
|
||||
deletedFiles: {
|
||||
indexExists: sinon.stub().resolves(false),
|
||||
deleteMany: sinon.stub(),
|
||||
},
|
||||
projects: {
|
||||
insertOne: sinon.stub().resolves(),
|
||||
},
|
||||
|
||||
@@ -4,7 +4,6 @@ const tk = require('timekeeper')
|
||||
const Errors = require('../../../../app/src/Features/Errors/Errors')
|
||||
const { ObjectId } = require('mongodb-legacy')
|
||||
const SandboxedModule = require('sandboxed-module')
|
||||
const { DeletedFile } = require('../helpers/models/DeletedFile')
|
||||
const { Project } = require('../helpers/models/Project')
|
||||
|
||||
const MODULE_PATH =
|
||||
@@ -77,7 +76,6 @@ describe('ProjectEntityMongoUpdateHandler', function () {
|
||||
}
|
||||
|
||||
this.FolderModel = sinon.stub()
|
||||
this.DeletedFileMock = sinon.mock(DeletedFile)
|
||||
this.ProjectMock = sinon.mock(Project)
|
||||
this.ProjectEntityHandler = {
|
||||
getAllEntitiesFromProject: sinon.stub(),
|
||||
@@ -197,7 +195,6 @@ describe('ProjectEntityMongoUpdateHandler', function () {
|
||||
'../Cooldown/CooldownManager': this.CooldownManager,
|
||||
'../../models/Folder': { Folder: this.FolderModel },
|
||||
'../../infrastructure/LockManager': this.LockManager,
|
||||
'../../models/DeletedFile': { DeletedFile },
|
||||
'../../models/Project': { Project },
|
||||
'./ProjectEntityHandler': this.ProjectEntityHandler,
|
||||
'./ProjectLocator': this.ProjectLocator,
|
||||
@@ -208,7 +205,6 @@ describe('ProjectEntityMongoUpdateHandler', function () {
|
||||
})
|
||||
|
||||
afterEach(function () {
|
||||
this.DeletedFileMock.restore()
|
||||
this.ProjectMock.restore()
|
||||
tk.reset()
|
||||
})
|
||||
@@ -374,17 +370,6 @@ describe('ProjectEntityMongoUpdateHandler', function () {
|
||||
linkedFileData: { some: 'data' },
|
||||
hash: 'some-hash',
|
||||
}
|
||||
// Add a deleted file record
|
||||
this.DeletedFileMock.expects('create')
|
||||
.withArgs({
|
||||
projectId: this.project._id,
|
||||
_id: this.file._id,
|
||||
name: this.file.name,
|
||||
linkedFileData: this.file.linkedFileData,
|
||||
hash: this.file.hash,
|
||||
deletedAt: sinon.match.date,
|
||||
})
|
||||
.resolves()
|
||||
// Update the file in place
|
||||
this.ProjectMock.expects('findOneAndUpdate')
|
||||
.withArgs(
|
||||
@@ -421,7 +406,6 @@ describe('ProjectEntityMongoUpdateHandler', function () {
|
||||
})
|
||||
|
||||
it('updates the database', function () {
|
||||
this.DeletedFileMock.verify()
|
||||
this.ProjectMock.verify()
|
||||
})
|
||||
})
|
||||
@@ -1059,29 +1043,6 @@ describe('ProjectEntityMongoUpdateHandler', function () {
|
||||
})
|
||||
})
|
||||
|
||||
describe('_insertDeletedFileReference', function () {
|
||||
beforeEach(async function () {
|
||||
this.DeletedFileMock.expects('create')
|
||||
.withArgs({
|
||||
projectId: this.project._id,
|
||||
_id: this.file._id,
|
||||
name: this.file.name,
|
||||
linkedFileData: this.file.linkedFileData,
|
||||
hash: this.file.hash,
|
||||
deletedAt: sinon.match.date,
|
||||
})
|
||||
.resolves()
|
||||
await this.subject.promises._insertDeletedFileReference(
|
||||
this.project._id,
|
||||
this.file
|
||||
)
|
||||
})
|
||||
|
||||
it('should update the database', function () {
|
||||
this.DeletedFileMock.verify()
|
||||
})
|
||||
})
|
||||
|
||||
describe('createNewFolderStructure', function () {
|
||||
beforeEach(function () {
|
||||
this.mockRootFolder = 'MOCK_ROOT_FOLDER'
|
||||
|
||||
@@ -133,7 +133,6 @@ describe('ProjectEntityUpdateHandler', function () {
|
||||
addFolder: sinon.stub(),
|
||||
_confirmFolder: sinon.stub(),
|
||||
_putElement: sinon.stub(),
|
||||
_insertDeletedFileReference: sinon.stub(),
|
||||
replaceFileWithNew: sinon.stub(),
|
||||
mkdirp: sinon.stub(),
|
||||
moveEntity: sinon.stub(),
|
||||
@@ -2572,7 +2571,6 @@ describe('ProjectEntityUpdateHandler', function () {
|
||||
this.ProjectEntityUpdateHandler.promises.unsetRootDoc = sinon
|
||||
.stub()
|
||||
.resolves()
|
||||
this.ProjectEntityMongoUpdateHandler.promises._insertDeletedFileReference.resolves()
|
||||
})
|
||||
|
||||
describe('a file', function () {
|
||||
@@ -2592,12 +2590,6 @@ describe('ProjectEntityUpdateHandler', function () {
|
||||
)
|
||||
})
|
||||
|
||||
it('should insert the file into the deletedFiles collection', function () {
|
||||
this.ProjectEntityMongoUpdateHandler.promises._insertDeletedFileReference
|
||||
.calledWith(this.project._id, this.entity)
|
||||
.should.equal(true)
|
||||
})
|
||||
|
||||
it('should not delete the file from FileStoreHandler', function () {
|
||||
this.FileStoreHandler.promises.deleteFile
|
||||
.calledWith(projectId, this.entityId)
|
||||
@@ -2696,7 +2688,6 @@ describe('ProjectEntityUpdateHandler', function () {
|
||||
}
|
||||
|
||||
this.ProjectEntityUpdateHandler._cleanUpDoc = sinon.stub().resolves()
|
||||
this.ProjectEntityUpdateHandler._cleanUpFile = sinon.stub().resolves()
|
||||
const path = '/folder'
|
||||
this.newProject = 'new-project'
|
||||
this.subtreeListing =
|
||||
@@ -2711,17 +2702,6 @@ describe('ProjectEntityUpdateHandler', function () {
|
||||
)
|
||||
})
|
||||
|
||||
it('should clean up all sub files', function () {
|
||||
this.ProjectEntityUpdateHandler._cleanUpFile.should.have.been.calledWith(
|
||||
this.project,
|
||||
this.file1
|
||||
)
|
||||
this.ProjectEntityUpdateHandler._cleanUpFile.should.have.been.calledWith(
|
||||
this.project,
|
||||
this.file2
|
||||
)
|
||||
})
|
||||
|
||||
it('should clean up all sub docs', function () {
|
||||
this.ProjectEntityUpdateHandler._cleanUpDoc
|
||||
.calledWith(
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
const mockModel = require('../MockModel')
|
||||
|
||||
module.exports = mockModel('DeletedFile')
|
||||
Reference in New Issue
Block a user