From 39e9ce97c3eeb8d76b52b4dfab581c9abea59d24 Mon Sep 17 00:00:00 2001 From: ElaineDeMattosSilvaB Date: Tue, 11 Nov 2025 18:08:52 +0100 Subject: [PATCH] feat: include files with tokens in backup production-snapshots Signed-off-by: ElaineDeMattosSilvaB --- tools/blobstorage-backupdata/BackupJob.cs | 23 ++- .../blobstorage-backupdata/BackupJobTests.cs | 145 +++++++++++++++++- 2 files changed, 164 insertions(+), 4 deletions(-) diff --git a/tools/blobstorage-backupdata/BackupJob.cs b/tools/blobstorage-backupdata/BackupJob.cs index 66e4599..7516bdf 100644 --- a/tools/blobstorage-backupdata/BackupJob.cs +++ b/tools/blobstorage-backupdata/BackupJob.cs @@ -39,7 +39,7 @@ public async Task ProcessJob() var existingIndex = await GetIndex(); var changesIndex = new SortedDictionary>(); var database = MongoClient.GetDatabase("clearlydefined", null); - var collection = database.GetCollection("definitions-trimmed"); + var collection = database.GetCollection("definitions-paged"); // lambda to enable lazy evaluation, avoiding the `IndexOutOfRangeException` exception on empty `existingIndex` var beginningDateFilter = () => @@ -71,6 +71,21 @@ public async Task ProcessJob() await SaveData(cursor, existingIndex, changesIndex); } + private JObject FilterFilesWithoutToken(JObject definition) + { + if (definition["files"] is not JArray filesArray) + { + return definition; + } + + var originalCount = filesArray.Count; + var filteredFiles = new JArray(filesArray.Where(file => file["token"] != null)); + + definition["files"] = filteredFiles; + + return definition; + } + private async Task SaveData( IAsyncCursor cursor, string[] existingIndex, @@ -104,6 +119,10 @@ await Parallel.ForEachAsync(cursor.Current, async (document, _) => { throw new Exception("Failed to deserialize the document."); } + + // Filter files from files array without token + jObject = FilterFilesWithoutToken(jObject); + var blobName = jObject.GetBlobName(); if (string.IsNullOrWhiteSpace(blobName)) { @@ -206,4 +225,4 @@ public string RenderFilter(FilterDefinition filter, IMongoCollecti { return filter.Render(collection.DocumentSerializer, collection.Settings.SerializerRegistry).ToString(); } -} \ No newline at end of file +} diff --git a/tools/blobstorage-backupdata/BackupJobTests.cs b/tools/blobstorage-backupdata/BackupJobTests.cs index b77bb94..589c6fa 100644 --- a/tools/blobstorage-backupdata/BackupJobTests.cs +++ b/tools/blobstorage-backupdata/BackupJobTests.cs @@ -173,7 +173,7 @@ public void SetUp() var mockMongoCollection = new Mock>(); mockMongoClient.Setup(x => x.GetDatabase("clearlydefined", null)).Returns(mockDatabase.Object); - mockDatabase.Setup(x => x.GetCollection("definitions-trimmed", null)).Returns(mockMongoCollection.Object); + mockDatabase.Setup(x => x.GetCollection("definitions-paged", null)).Returns(mockMongoCollection.Object); mockMongoCollection .Setup(x => x.FindAsync(It.IsAny>(), It.IsAny>(), default)) .Callback((FilterDefinition filter, FindOptions options, CancellationToken token) => { @@ -303,4 +303,145 @@ public void TestSaveData_ShouldExcludeCurrentHour() { .Select(x => BsonDocument.Parse(x.Value)) .Should().HaveCount(1); } -} \ No newline at end of file + [Test] + public void TestFilterFilesWithoutToken_RemovesFilesWithoutToken() + { + var backupJob = new BackupJob( + mockBlobContainerClient.Object, + mockMongoClient.Object, + DateTime.UtcNow, + loggerFactory, + new MockFilterRenderer() + ); + + var data = new Dictionary + { + { indexPath, "" }, + { "changes/2023-01-01-00", null }, + { "npm/npmjs/-/test-package/1.0.0.json", null }, + }; + SetupMockBlobClient(mockBlobContainerClient, data); + + var definitionWithFiles = """ + { + "_id": "npm/npmjs/-/test-package/1.0.0", + "_meta": {"updated": "2023-01-01T00:00:00Z"}, + "described": {"files": 5}, + "files": [ + { + "path": "package/LICENSE", + "license": "MIT", + "token": "abc123token" + }, + { + "path": "package/README.md", + "license": "MIT", + "token": "def456token" + }, + { + "path": "package/index.js" + + }, + { + "path": "package/test.js" + } + ] + } + """; + + var bsonDefinitions = new List { BsonDocument.Parse(definitionWithFiles) }; + mockCursor.Initialize(new List> { bsonDefinitions }); + + backupJob.ProcessJob().Wait(); + + // Verify the uploaded definition + var uploadedDefinition = JObject.Parse(data["npm/npmjs/-/test-package/1.0.0.json"]!); + var filesArray = uploadedDefinition["files"] as JArray; + + // Should only contain files with token + filesArray.Should().NotBeNull(); + filesArray.Should().HaveCount(2); + filesArray!.All(f => f["token"] != null).Should().BeTrue(); + + // Verify the paths of remaining files + var remainingPaths = filesArray.Select(f => f["path"]?.ToString()).ToList(); + remainingPaths.Should().BeEquivalentTo(new[] { "package/LICENSE", "package/README.md" }); + } + + [Test] + public void TestFilterFilesWithoutToken_HandlesNoFilesArray() + { + var backupJob = new BackupJob( + mockBlobContainerClient.Object, + mockMongoClient.Object, + DateTime.UtcNow, + loggerFactory, + new MockFilterRenderer() + ); + + var data = new Dictionary + { + { indexPath, "" }, + { "changes/2023-01-01-00", null }, + { "npm/npmjs/-/no-files/1.0.0.json", null }, + }; + SetupMockBlobClient(mockBlobContainerClient, data); + + var definitionWithoutFiles = """ + { + "_id": "npm/npmjs/-/no-files/1.0.0", + "_meta": {"updated": "2023-01-01T00:00:00Z"}, + "described": {"releaseDate": "2023-01-01"} + } + """; + + var bsonDefinitions = new List { BsonDocument.Parse(definitionWithoutFiles) }; + mockCursor.Initialize(new List> { bsonDefinitions }); + + Assert.DoesNotThrow(() => backupJob.ProcessJob().Wait()); + + var uploadedDefinition = JObject.Parse(data["npm/npmjs/-/no-files/1.0.0.json"]!); + uploadedDefinition["files"].Should().BeNull(); + } + + [Test] + public void TestFilterFilesWithoutToken_HandlesEmptyFilesArray() + { + var backupJob = new BackupJob( + mockBlobContainerClient.Object, + mockMongoClient.Object, + DateTime.UtcNow, + loggerFactory, + new MockFilterRenderer() + ); + + var data = new Dictionary + { + { indexPath, "" }, + { "changes/2023-01-01-00", null }, + { "npm/npmjs/-/empty-files/1.0.0.json", null }, + }; + SetupMockBlobClient(mockBlobContainerClient, data); + + var definitionWithEmptyFiles = """ + { + "_id": "npm/npmjs/-/empty-files/1.0.0", + "_meta": {"updated": "2023-01-01T00:00:00Z"}, + "described": {"files": 0}, + "files": [] + } + """; + + var bsonDefinitions = new List + { + BsonDocument.Parse(definitionWithEmptyFiles), + }; + mockCursor.Initialize(new List> { bsonDefinitions }); + + backupJob.ProcessJob().Wait(); + + var uploadedDefinition = JObject.Parse(data["npm/npmjs/-/empty-files/1.0.0.json"]!); + var filesArray = uploadedDefinition["files"] as JArray; + filesArray.Should().NotBeNull().And.BeEmpty(); + } +}