Skip to content

Commit

Permalink
Stream archives to Azure Blob Storage in bbs2gh migrate-repo, rathe…
Browse files Browse the repository at this point in the history
…r than loading the whole archive into memory (#978)

When running a `bbs2gh migrate-repo` migration with Azure Blob Storage
where the archive is larger than 2GB, currently it fails with a
`System.IO.IOException` stating:

> The file is too long. This operation is currently limited to
supporting files less than 2 gigabytes in size.

This switches to streaming the file, rather than reading it all into
memory, mirroring the approach (and using the code!) from `gh gei`.

Fixes #974.

<!--
For the checkboxes below you must check each one to indicate that you
either did the relevant task, or considered it and decided there was
nothing that needed doing
-->

- [x] Did you write/update appropriate tests
- [x] Release notes updated (if appropriate)
- [x] Appropriate logging output
- [x] Issue linked
- [ ] Docs updated (or issue created)
- [ ] New package licenses are added to `ThirdPartyNotices.txt` (if
applicable)

<!--
For docs we should review the docs at:
https://docs.github.com/en/migrations/using-github-enterprise-importer
and the README.md in this repo

If a doc update is required based on the changes in this PR, it is
sufficient to create an issue and link to it here. The doc update can be
made later/separately.
-->

---------

Co-authored-by: Dylan Smith <dylanfromwinnipeg@gmail.com>
  • Loading branch information
timrogers and dylan-smith authored May 8, 2023
1 parent 7bf64fe commit 103db85
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 16 deletions.
3 changes: 2 additions & 1 deletion RELEASENOTES.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
- Include the migration ID in the default output filename when running `download-logs`
- Include the date with the timestamp when writing to the log
- When blob storage credentials are provided to the CLI but will not be used for a GHES migration, log a clear warning, not an info message
- Unhide the `--archive-download-host` argument in the documentation for `gh bbs2gh migrate-repo` and `gh bbs2gh generate-script`
- Fix support for Azure Blob Storage in `bbs2gh` migrations when the archive is larger than 2GB
- Unhide the `--archive-download-host` argument in the documentation for `gh bbs2gh migrate-repo` and `gh bbs2gh generate-script`
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using System.IO;
using System.Text;
using System.Threading.Tasks;
using FluentAssertions;
Expand Down Expand Up @@ -156,7 +157,7 @@ public async Task Happy_Path_Generate_Archive_Ssh_Download_Azure_Upload_And_Inge
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
_mockBbsArchiveDownloader.Setup(x => x.Download(BBS_EXPORT_ID, It.IsAny<string>())).ReturnsAsync(ARCHIVE_PATH);
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(ARCHIVE_DATA);
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), ARCHIVE_DATA)).ReturnsAsync(new Uri(ARCHIVE_URL));
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));
_mockGithubApi.Setup(x => x.GetOrganizationId(GITHUB_ORG).Result).Returns(GITHUB_ORG_ID);
_mockGithubApi.Setup(x => x.CreateBbsMigrationSource(GITHUB_ORG_ID).Result).Returns(MIGRATION_SOURCE_ID);

Expand Down Expand Up @@ -240,8 +241,7 @@ public async Task Happy_Path_Full_Flow_Running_On_Bbs_Server()

_mockBbsApi.Setup(x => x.StartExport(BBS_PROJECT, BBS_REPO)).ReturnsAsync(BBS_EXPORT_ID);
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(archivePath)).ReturnsAsync(ARCHIVE_DATA);
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), ARCHIVE_DATA)).ReturnsAsync(new Uri(ARCHIVE_URL));
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));
_mockGithubApi.Setup(x => x.GetOrganizationId(GITHUB_ORG).Result).Returns(GITHUB_ORG_ID);
_mockGithubApi.Setup(x => x.CreateBbsMigrationSource(GITHUB_ORG_ID).Result).Returns(MIGRATION_SOURCE_ID);

Expand Down Expand Up @@ -296,7 +296,7 @@ public async Task Happy_Path_Full_Flow_Bbs_Credentials_Via_Environment()
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
_mockBbsArchiveDownloader.Setup(x => x.Download(BBS_EXPORT_ID, It.IsAny<string>())).ReturnsAsync(ARCHIVE_PATH);
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(ARCHIVE_DATA);
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), ARCHIVE_DATA)).ReturnsAsync(new Uri(ARCHIVE_URL));
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));
_mockGithubApi.Setup(x => x.GetOrganizationId(GITHUB_ORG).Result).Returns(GITHUB_ORG_ID);
_mockGithubApi.Setup(x => x.CreateBbsMigrationSource(GITHUB_ORG_ID).Result).Returns(MIGRATION_SOURCE_ID);

Expand Down Expand Up @@ -335,7 +335,7 @@ public async Task Happy_Path_Deletes_Downloaded_Archive()
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
_mockBbsArchiveDownloader.Setup(x => x.Download(BBS_EXPORT_ID, It.IsAny<string>())).ReturnsAsync(ARCHIVE_PATH);
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(ARCHIVE_DATA);
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), ARCHIVE_DATA)).ReturnsAsync(new Uri(ARCHIVE_URL));
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));

// Act
var args = new MigrateRepoCommandArgs
Expand Down Expand Up @@ -367,7 +367,7 @@ public async Task It_Deletes_Downloaded_Archive_Even_If_Upload_Fails()
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
_mockBbsArchiveDownloader.Setup(x => x.Download(BBS_EXPORT_ID, It.IsAny<string>())).ReturnsAsync(ARCHIVE_PATH);
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(ARCHIVE_DATA);
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), ARCHIVE_DATA)).ThrowsAsync(new InvalidOperationException());
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ThrowsAsync(new InvalidOperationException());

// Act
var args = new MigrateRepoCommandArgs
Expand Down Expand Up @@ -398,7 +398,7 @@ public async Task Happy_Path_Does_Not_Throw_If_Fails_To_Delete_Downloaded_Archiv
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
_mockBbsArchiveDownloader.Setup(x => x.Download(BBS_EXPORT_ID, It.IsAny<string>())).ReturnsAsync(ARCHIVE_PATH);
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(ARCHIVE_DATA);
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), ARCHIVE_DATA)).ReturnsAsync(new Uri(ARCHIVE_URL));
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));
_mockFileSystemProvider.Setup(x => x.DeleteIfExists(It.IsAny<string>())).Throws(new UnauthorizedAccessException("Access Denied"));

// Act
Expand Down Expand Up @@ -616,7 +616,7 @@ public async Task Uses_Archive_Path_If_Provided()
var archiveBytes = Encoding.ASCII.GetBytes("here are some bytes");
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(archiveBytes);

_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), archiveBytes)).ReturnsAsync(new Uri(ARCHIVE_URL));
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));

_mockGithubApi.Setup(x => x.GetOrganizationId(GITHUB_ORG).Result).Returns(GITHUB_ORG_ID);
_mockGithubApi.Setup(x => x.CreateBbsMigrationSource(GITHUB_ORG_ID).Result).Returns(MIGRATION_SOURCE_ID);
Expand Down Expand Up @@ -763,7 +763,7 @@ await _handler.Invoking(x => x.Handle(args))
public async Task It_Does_Not_Set_The_Archive_Path_When_Archive_Path_Is_Provided()
{
// Arrange
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<byte[]>())).ReturnsAsync(new Uri(ARCHIVE_URL));
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));

// Act
var args = new MigrateRepoCommandArgs
Expand All @@ -778,7 +778,7 @@ public async Task It_Does_Not_Set_The_Archive_Path_When_Archive_Path_Is_Provided

// Assert
args.ArchivePath.Should().Be(ARCHIVE_PATH);
_mockFileSystemProvider.Verify(m => m.ReadAllBytesAsync(ARCHIVE_PATH));
_mockFileSystemProvider.Verify(m => m.OpenRead(ARCHIVE_PATH));
}

[Fact]
Expand Down
13 changes: 8 additions & 5 deletions src/bbs2gh/Handlers/MigrateRepoCommandHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,14 @@ private async Task<string> UploadArchiveToAzure(string archivePath)
{
_log.LogInformation("Uploading Archive to Azure...");

var archiveData = await _fileSystemProvider.ReadAllBytesAsync(archivePath);
var archiveName = GenerateArchiveName();
var archiveBlobUrl = await _azureApi.UploadToBlob(archiveName, archiveData);

return archiveBlobUrl.ToString();
#pragma warning disable IDE0063
await using (var archiveData = _fileSystemProvider.OpenRead(archivePath))
#pragma warning restore IDE0063
{
var archiveName = GenerateArchiveName();
var archiveBlobUrl = await _azureApi.UploadToBlob(archiveName, archiveData);
return archiveBlobUrl.ToString();
}
}

private string GenerateArchiveName() => $"{Guid.NewGuid()}.tar";
Expand Down

0 comments on commit 103db85

Please sign in to comment.