Skip to content

Commit

Permalink
Merge pull request #310 from happo/readdir-recursive
Browse files Browse the repository at this point in the history
Refactor archiving to go through single determinstic function
  • Loading branch information
lencioni authored Feb 13, 2025
2 parents 8fdb4f4 + 32ac8b4 commit f4bade3
Show file tree
Hide file tree
Showing 9 changed files with 390 additions and 243 deletions.
124 changes: 18 additions & 106 deletions src/createStaticPackage.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
import { Writable } from 'stream';
import crypto from 'crypto';
import path from 'path';
import fs from 'fs';

import Archiver from 'archiver';

import validateArchive from './validateArchive';
import deterministicArchive from './deterministicArchive';

// We're setting the creation date to the same for all files so that the zip
// packages created for the same content ends up having the same fingerprint.
Expand All @@ -24,107 +17,26 @@ const IFRAME_CONTENT = `
`;

/**
* Gets all files in a directory and all of its subdirectories
*
* The returned value is sorted for deterministic output.
* Creates a static package of the given files
*
* @param {string} dir
* @returns {Array<string>}
* @param {Object} options
* @param {string} options.tmpdir
* @param {string[]} options.publicFolders
* @returns {Promise<{buffer: Buffer, hash: string}>}
*/
function getFilesRecursive(dir) {
const files = [];
const dirs = [dir];

while (dirs.length > 0) {
const currentDir = dirs.shift();
// Node 18 adds `recursive` option to `fs.readdirSync`, which would
// make this function simpler.
const dirents = fs.readdirSync(currentDir, { withFileTypes: true });

for (const dirent of dirents) {
const res = path.resolve(currentDir, dirent.name);

if (dirent.isDirectory()) {
// This is a directory, so we are going to add it to the list of directories to recurse into.
dirs.push(res);
files.push(res);
} else {
files.push(res);
}
}
}

// Sort the files for deterministic output. This is important so that
// the archive hash is the same.
files.sort((a, b) => a.localeCompare(b));
return files;
}

export default function createStaticPackage({ tmpdir, publicFolders }) {
return new Promise((resolve, reject) => {
const archive = new Archiver('zip', {
// Concurrency in the stat queue leads to non-deterministic output.
// https://github.com/archiverjs/node-archiver/issues/383#issuecomment-2253139948
statConcurrency: 1,
zlib: { level: 6 },
});

const stream = new Writable();
const data = [];

stream._write = (chunk, _enc, done) => {
data.push(...chunk);
done();
};

const entries = [];
archive.on('entry', (entry) => {
entries.push(entry);
});

stream.on('finish', () => {
validateArchive(archive.pointer(), entries);
const buffer = Buffer.from(data);
const hash = crypto.createHash('md5').update(buffer).digest('hex');

resolve({ buffer, hash });
});
archive.pipe(stream);

// We can't use archive.directory() here because it is not deterministic.
// https://github.com/archiverjs/node-archiver/issues/383#issuecomment-2252938075
const tmpdirFiles = getFilesRecursive(tmpdir);
for (const file of tmpdirFiles) {
archive.file(file, {
name: file.slice(tmpdir.length + 1),
prefix: '',
date: FILE_CREATION_DATE,
});
}

publicFolders.forEach((folder) => {
if (folder === tmpdir) {
// ignore, since this is handled separately
} else if (folder.startsWith(process.cwd())) {
const folderFiles = getFilesRecursive(folder);
for (const file of folderFiles) {
archive.file(file, {
name: file.slice(folder.length + 1),
prefix: '',
date: FILE_CREATION_DATE,
});
}
}
});

archive.append(IFRAME_CONTENT, {
export default async function createStaticPackage({ tmpdir, publicFolders }) {
const cwd = process.cwd();
const filteredPublicFolders = publicFolders.filter((folder) =>
folder.startsWith(cwd),
);
const uniqueDirs = Array.from(new Set([tmpdir, ...filteredPublicFolders]));

return deterministicArchive(uniqueDirs, [
{
name: 'iframe.html',
date: FILE_CREATION_DATE,
});

archive.on('error', reject);
archive.finalize();
});
content: IFRAME_CONTENT,
},
]);
}

export { FILE_CREATION_DATE };
143 changes: 143 additions & 0 deletions src/deterministicArchive.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import { Writable } from 'stream';
import crypto from 'crypto';
import fs from 'fs';
import path from 'path';

import Archiver from 'archiver';

import validateArchive from './validateArchive';

// We're setting the creation date to the same for all files so that the zip
// packages created for the same content ends up having the same fingerprint.
const FILE_CREATION_DATE = new Date('Fri Feb 08 2019 13:31:55 GMT+0100 (CET)');

/**
* Resolves all files in a directory and all of its subdirectories
*
* @param {string} dirOrFile
* @returns {Promise<Array<{name: string, path: string}>>}
*/
async function resolveFilesRecursiveForDir(dirOrFile) {
const isDir = (await fs.promises.lstat(dirOrFile)).isDirectory();

if (isDir) {
const files = await fs.promises.readdir(dirOrFile, {
withFileTypes: true,
recursive: true,
});

return files
.filter((dirent) => dirent.isFile())
.map((dirent) => {
const fullPath = path.resolve(dirent.path, dirent.name);
return {
name: fullPath.slice(dirOrFile.length + 1),
path: fullPath,
};
});
}

return [
{
name: path.basename(dirOrFile),
path: path.resolve(dirOrFile),
},
];
}

/**
* Resolves all files in all directories recursively
*
* @param {...string} dirsAndFiles
* @returns {Promise<Array<{name: string, path: string}>>}
*/
async function resolveFilesRecursive(...dirsAndFiles) {
const files = await Promise.all(
dirsAndFiles.filter(Boolean).map(resolveFilesRecursiveForDir),
);

return files.flat();
}

/**
* Creates a deterministic archive of the given files
*
* @param {string[]} dirsAndFiles
* @param {{name: string, content: string}[]} contentToArchive
* @returns {Promise<{buffer: Buffer, hash: string}>}
*/
export default async function deterministicArchive(
dirsAndFiles,
contentToArchive = [],
) {
const uniqueDirsAndFiles = Array.from(new Set(dirsAndFiles));

// Sort by name to make the output deterministic
const filesToArchiveSorted = (
await resolveFilesRecursive(...uniqueDirsAndFiles)
).sort((a, b) => a.name.localeCompare(b.name));

const contentToArchiveSorted = contentToArchive.sort((a, b) =>
a.name.localeCompare(b.name),
);

return new Promise((resolve, reject) => {
const archive = new Archiver('zip', {
// Concurrency in the stat queue leads to non-deterministic output.
// https://github.com/archiverjs/node-archiver/issues/383#issuecomment-2253139948
statConcurrency: 1,
zlib: { level: 6 },
});

const stream = new Writable();
const data = [];

stream._write = (chunk, _enc, done) => {
data.push(...chunk);
done();
};

const entries = [];
archive.on('entry', (entry) => {
entries.push(entry);
});

stream.on('finish', () => {
validateArchive(archive.pointer(), entries);
const buffer = Buffer.from(data);
const hash = crypto.createHash('md5').update(buffer).digest('hex');

resolve({ buffer, hash });
});
archive.pipe(stream);

const seenFiles = new Set();

// We can't use archive.directory() here because it is not deterministic.
// https://github.com/archiverjs/node-archiver/issues/383#issuecomment-2252938075
for (const file of filesToArchiveSorted) {
if (!seenFiles.has(file.name)) {
archive.file(file.path, {
name: file.name,
prefix: '',
date: FILE_CREATION_DATE,
});
seenFiles.add(file.name);
}
}

for (const file of contentToArchiveSorted) {
if (!seenFiles.has(file.name)) {
archive.append(file.content, {
name: file.name,
prefix: '',
date: FILE_CREATION_DATE,
});
seenFiles.add(file.name);
}
}

archive.on('error', reject);
archive.finalize();
});
}
1 change: 1 addition & 0 deletions src/domRunner.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ async function uploadStaticPackage({
tmpdir,
publicFolders,
});

const assetsPath = await uploadAssets(buffer, {
apiKey,
apiSecret,
Expand Down
Loading

0 comments on commit f4bade3

Please sign in to comment.