Skip to content

Commit

Permalink
feat(db): periodically exports the database
Browse files Browse the repository at this point in the history
  • Loading branch information
vmarseguerra committed Jun 19, 2023
1 parent 0f38d20 commit 9a62869
Show file tree
Hide file tree
Showing 22 changed files with 3,499 additions and 128 deletions.
21 changes: 21 additions & 0 deletions api/controllers/v1/caver/get-db-export.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
const ControllerService = require('../../../services/ControllerService');
const FileService = require('../../../services/FileService');
const exportUtils = require('../../../../script/dbExport/utils');

module.exports = async (req, res) => {
const SIX_HOUR_MS = 1000 * 60 * 60 * 6;

const url = FileService.dbExport.getUrl(
exportUtils.EXPORT_FILE_NAME,
SIX_HOUR_MS
);
const metadata = await FileService.dbExport.getMetadata();

return ControllerService.treat(
req,
null,
{ url, ...metadata },
{ controllerMethod: 'CaverController.getDbExport' },
res
);
};
2 changes: 1 addition & 1 deletion api/controllers/v1/document/create.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ module.exports = async (req, res) => {
try {
await Promise.all(
files.map(async (file) => {
await FileService.create(file, createdDocument.id);
await FileService.document.create(file, createdDocument.id);
})
);
} catch (err) {
Expand Down
12 changes: 7 additions & 5 deletions api/controllers/v1/document/multiple-validate.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,19 @@ async function validateAndUpdateDocument(
// they just need to be linked to the document.
if (newFiles) {
filePromises.push(
...newFiles.map((f) =>
FileService.updateOne(f.id).set({ isValidated: true })
)
...newFiles.map((f) => TFile.updateOne(f.id).set({ isValidated: true }))
);
}
if (modifiedFiles) {
filePromises.push(...modifiedFiles.map((f) => FileService.update(f)));
filePromises.push(
...modifiedFiles.map((f) => FileService.document.update(f))
);
}

if (deletedFiles) {
filePromises.push(...deletedFiles.map((f) => FileService.delete(f)));
filePromises.push(
...deletedFiles.map((f) => FileService.document.delete(f))
);
}
await Promise.all(filePromises);
});
Expand Down
2 changes: 1 addition & 1 deletion api/controllers/v1/document/update.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ module.exports = async (req, res) => {
try {
await Promise.all(
files.map(async (file) => {
const createdFile = await FileService.create(
const createdFile = await FileService.document.create(
file,
req.param('id'),
true,
Expand Down
2 changes: 1 addition & 1 deletion api/services/DocumentService.js
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ module.exports = {
)
);
if (file) {
await FileService.create(file, document.id);
await FileService.document.create(file, document.id);
}
}
}
Expand Down
239 changes: 163 additions & 76 deletions api/services/FileService.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,39 @@
const {
BlobServiceClient,
StorageSharedKeyCredential,
BlobSASPermissions,
generateBlobSASQueryParameters,
} = require('@azure/storage-blob');
const stream = require('stream');
const ramda = require('ramda');

const AZURE_CONTAINER = 'documents';
const AZURE_ACCOUNT = 'grottocenter';
const { AZURE_KEY = '', AZURE_LINK = '' } = process.env;
const AZURE_CONTAINER_DOCUMENTS = 'documents';
const AZURE_CONTAINER_DB_SNAPSHOTS = 'db-exports';

const sharedKeyCredential = new StorageSharedKeyCredential(
AZURE_ACCOUNT,
AZURE_KEY
);
const blobServiceClient =
!ramda.isEmpty(AZURE_LINK) &&
!ramda.isEmpty(AZURE_KEY) &&
new BlobServiceClient(AZURE_LINK, sharedKeyCredential);
const { AZURE_KEY = '' } = process.env;

let credentials = null;

if (AZURE_KEY) {
const sharedKeyCredential = new StorageSharedKeyCredential(
AZURE_ACCOUNT,
AZURE_KEY
);
const blobServiceClient = new BlobServiceClient(
`https://${AZURE_ACCOUNT}.blob.core.windows.net/`,
sharedKeyCredential
);
credentials = {
sharedKeyCredential,
dbExportBlobClient: blobServiceClient.getContainerClient(
AZURE_CONTAINER_DB_SNAPSHOTS
),
documentsBlobClient: blobServiceClient.getContainerClient(
AZURE_CONTAINER_DOCUMENTS
),
};
}

const INVALID_FORMAT = 'INVALID_FORMAT';
const INVALID_NAME = 'INVALID_NAME';
Expand All @@ -34,54 +52,83 @@ const generateName = (fileName) => {
return `${identifier}-${newFileName}`;
};

function noCredentialWarning(name, args) {
const fmtArgs = Object.entries(args)
.map((e) => e.join(': '))
.join(', ');
sails.log.warn(`Azure ${name} Missing credential, ${fmtArgs}`);
return null;
}

function getSignedReadUrl(container, path, expiresOnMs) {
const sasQuery = generateBlobSASQueryParameters(
{
blobName: path,
containerName: container,
expiresOn: new Date(Date.now() + expiresOnMs),
permissions: BlobSASPermissions.parse('r'),
},
credentials.sharedKeyCredential
);

return `https://${AZURE_ACCOUNT}.blob.core.windows.net/${container}/${path}?${sasQuery.toString()}`;
}

module.exports = {
INVALID_FORMAT,
INVALID_NAME,
ERROR_DURING_UPLOAD_TO_AZURE,
getAzureData: () => ({
linkAccount: AZURE_LINK,
container: AZURE_CONTAINER,
}),

// File is a multer object : https://github.com/expressjs/multer#file-information
/**
*
* @param {*} file
* @param {*} idDocument
* @param {*} fetchResult
* @param {*} isValidated
* @throws {FileError}
* @returns
*/
// eslint-disable-next-line consistent-return
create: async (file, idDocument, fetchResult = false, isValidated = true) => {
const name = file.originalname;
const mimeType = file.mimetype;
const pathName = generateName(name);
const nameSplit = name.split('.');
if (nameSplit.length !== 2) {
throw new FileError(INVALID_NAME, name);
}
const extension = nameSplit[1];
const foundFormat = await TFileFormat.find({
mimeType,
extension: extension.toLowerCase(),
}).limit(1);
if (ramda.isEmpty(foundFormat)) {
throw new FileError(INVALID_FORMAT, name);
}

if (blobServiceClient) {
const containerClient =
blobServiceClient.getContainerClient(AZURE_CONTAINER);
const blockBlobClient = containerClient.getBlockBlobClient(pathName);

isCredentials: !!credentials,

document: {
getUrl(path) {
// The documents container allow anonymous access
return `https://${AZURE_ACCOUNT}.blob.core.windows.net/${AZURE_CONTAINER_DOCUMENTS}/${path}`;
},

// File is a multer object : https://github.com/expressjs/multer#file-information
/**
*
* @param {*} file
* @param {*} idDocument
* @param {*} fetchResult
* @param {*} isValidated
* @throws {FileError}
* @returns
*/
// eslint-disable-next-line consistent-return
async create(file, idDocument, fetchResult = false, isValidated = true) {
const name = file.originalname;
const mimeType = file.mimetype;
const pathName = generateName(name);
const nameSplit = name.split('.');
if (nameSplit.length !== 2) {
throw new FileError(INVALID_NAME, name);
}

const foundFormat = await TFileFormat.find({
mimeType,
extension: nameSplit[1].toLowerCase(),
}).limit(1);
if (ramda.isEmpty(foundFormat)) {
throw new FileError(INVALID_FORMAT, name);
}

if (!credentials) {
return noCredentialWarning('Document upload', {
name,
mimeType,
size: file.size,
});
}

sails.log.info(`Uploading ${name} to Azure Blob...`);
try {
const blockBlobClient =
credentials.documentsBlobClient.getBlockBlobClient(pathName);
await blockBlobClient.uploadData(file.buffer, {
blobHTTPHeaders: {
blobContentType: mimeType,
},
blobHTTPHeaders: { blobContentType: mimeType },
});
} catch (err) {
throw new FileError(ERROR_DURING_UPLOAD_TO_AZURE, name);
Expand All @@ -100,36 +147,76 @@ module.exports = {
return createdFile;
}
await TFile.create(param);
}
sails.log(
`===== FILES UPLOAD AZURE - DEBUG =====
You are seing this message because you didn't configure your Azure credentials locally. In production website, the following file whoud have been uploaded on the azure repository.
FILE NAME : ${name}
MIME TYPE : ${mimeType}
SIZE : ${file.size} bytes
`
);
},
},

update: async (file) => {
const res = await TFile.updateOne(file.id).set({
fileName: file.fileName,
});
return res;
async update(file) {
const res = await TFile.updateOne(file.id).set({
fileName: file.fileName,
});
return res;
},

async delete(file) {
const blockBlobClient =
credentials.documentsBlobClient.getBlockBlobClient(file.path);
await blockBlobClient.delete({ deleteSnapshots: 'include' });

const destroyedRecord = await TFile.destroyOne(file.id);
return destroyedRecord;
},
},

delete: async (file) => {
const pathName = file.path;
dbExport: {
getUrl(path, expiresOnMs) {
if (!credentials) return noCredentialWarning('dbExport getUrl', { path });
return getSignedReadUrl(AZURE_CONTAINER_DB_SNAPSHOTS, path, expiresOnMs);
},

async getMetadata() {
if (!credentials) return noCredentialWarning('dbExport getMetadata', {});
const metadataBlobClient =
credentials.dbExportBlobClient.getBlockBlobClient(
'exportMetadata.json'
);
const response = await metadataBlobClient.download();
let data = '';
for await (const chunk of response.readableStreamBody) data += chunk;
return JSON.parse(data);
},

const containerClient =
blobServiceClient.getContainerClient(AZURE_CONTAINER);
const blockBlobClient = containerClient.getBlockBlobClient(pathName);
await blockBlobClient.delete({
deleteSnapshots: 'include',
});
async setMetadata(archiveSize) {
if (!credentials) return noCredentialWarning('dbExport setMetadata', {});
const metadataBlobClient =
credentials.dbExportBlobClient.getBlockBlobClient(
'exportMetadata.json'
);
const dataStr = JSON.stringify({
lastUpdate: new Date().toISOString(),
size: archiveSize,
});
await metadataBlobClient.upload(dataStr, dataStr.length);
return null;
},

const destroyedRecord = await TFile.destroyOne(file.id);
return destroyedRecord;
upload(filename, mimeType) {
if (!credentials)
return noCredentialWarning('dbExport upload', { filename });

const ONE_MEGABYTE = 1024 * 1024;
const BUFFER_SIZE = 2 * ONE_MEGABYTE;
const MAX_BUFFERS = 3;

try {
const aStream = stream.PassThrough();
const blockBlobClient =
credentials.dbExportBlobClient.getBlockBlobClient(filename);
blockBlobClient.uploadStream(aStream, BUFFER_SIZE, MAX_BUFFERS, {
blobHTTPHeaders: { blobContentType: mimeType },
});
return aStream;
} catch (err) {
throw new FileError(ERROR_DURING_UPLOAD_TO_AZURE, filename);
}
},
},
};
11 changes: 4 additions & 7 deletions api/services/mapping/converters.js
Original file line number Diff line number Diff line change
Expand Up @@ -556,13 +556,10 @@ const c = {
return result;
},

toFile: (source) => {
const { container, linkAccount } = FileService.getAzureData();
return {
...source,
completePath: `${linkAccount}/${container}/${source.path}`,
};
},
toFile: (source) => ({
...source,
completePath: FileService.document.getUrl(source.path),
}),

toSimpleHistory: (source) => {
const result = {
Expand Down
22 changes: 22 additions & 0 deletions assets/dbExport/license_en.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
License

The data contained in the files of this archive come from the grottocenter.org website and must be used in
accordance with the license retained, namely:
● CC-BY-SA for data subject to copyright (https://creativecommons.org/licenses/by-sa/3.0/).
● ODbL for other data (https://opendatacommons.org/licenses/odbl/).

Special cases : the files archived on grottocenter.org and whose URL is indicated in these files are placed under
different free licenses, which are specified each time.

To respect each of these licenses you can refer to the texts which describe them.

In summary and in general:
● You can use this data for a website, an application or for any other use, in a framework where the data is
freely accessible, without user authentication;
● You must indicate the license that applies to the data you offer. It must therefore be compatible with the
licenses retained for the data of the grottocenter.org site that you use.
● You must also credit the authors:
○ For data under ODbL license by adding the mention "© Grottocenter and its contributors",
○ For data under CC-BY-SA license you can use the name of the original author and that of the last
contributor which are indicated in the attached file. For cavities you can use the snapshot page.
For example for the cavity whose ID is 37497, you can mention "@ Grottocenter (authors)"
Loading

0 comments on commit 9a62869

Please sign in to comment.