Add S3 data archiver

This commit is contained in:
Matt Isenhower 2023-05-29 16:17:10 -07:00
parent 840d45373a
commit ec783f52ec
5 changed files with 2573 additions and 7 deletions

View File

@ -10,6 +10,12 @@ NINTENDO_TOKEN=
# User agent string
USER_AGENT=
# S3 parameters
AWS_S3_ENDPOINT=
AWS_S3_BUCKET=
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
# Twitter API parameters
TWITTER_CONSUMER_KEY=
TWITTER_CONSUMER_SECRET=

94
app/data/DataArchiver.mjs Normal file
View File

@ -0,0 +1,94 @@
import { PutObjectCommand, S3Client } from '@aws-sdk/client-s3';
import fs from 'fs/promises';
import prefixedConsole from "../common/prefixedConsole.mjs";
export default class DataArchiver
{
inputDirectory = 'storage/archive';
deleteAfterUpload = true;
async process() {
if (!this.canRun) {
this.console.log('Skipping data archiver');
return;
}
this.console.log('Archiving data...');
for (let file of await this.getFiles()) {
await this.processFile(file);
}
this.console.log('Done!');
}
// Properties
get console() {
this._console ??= prefixedConsole('Archiver');
return this._console;
}
get canRun() {
return process.env.AWS_S3_ENDPOINT
&& process.env.AWS_S3_REGION
&& process.env.AWS_S3_BUCKET
&& process.env.AWS_ACCESS_KEY_ID
&& process.env.AWS_SECRET_ACCESS_KEY;
}
get s3Client() {
return this._client ??= new S3Client({
endpoint: process.env.AWS_S3_ENDPOINT,
region: process.env.AWS_S3_REGION,
credentials: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
}
});
}
// Helpers
getFiles() {
return fs.readdir(this.inputDirectory, { recursive: true });
}
async processFile(path) {
// Get the filename from the path
let file = path.split('/').pop();
// Extract the date from the filename
// Format: 2023-05-01.00-00-00.example.json
let match = file.match(/\b(\d{4}-\d{2}-\d{2})\b/);
if (!match) {
return;
}
let date = match[1];
let prefix = date.replace(/-/g, '/');
this.console.log(`Uploading file: ${file}`);
try {
await this.uploadViaS3(`${this.inputDirectory}/${path}`, `${prefix}/${file}`);
if (this.deleteAfterUpload) {
await fs.unlink(`${this.inputDirectory}/${path}`);
}
} catch (e) {
this.console.error(e);
}
}
async uploadViaS3(file, destination) {
return this.s3Client.send(new PutObjectCommand({
Bucket: process.env.AWS_S3_BUCKET,
Key: destination,
Body: await fs.readFile(file),
ACL: 'public-read',
ContentType: 'application/json',
}));
}
}

View File

@ -7,6 +7,7 @@ import { warmCaches } from "./splatnet/index.mjs";
import MastodonClient from './social/clients/MastodonClient.mjs';
import ImageWriter from './social/clients/ImageWriter.mjs';
import BlueskyClient from './social/clients/BlueskyClient.mjs';
import DataArchiver from './data/DataArchiver.mjs';
consoleStamp(console);
dotenv.config();
@ -21,6 +22,7 @@ const actions = {
splatnet: updatePrimary,
splatnetAll: updateAll,
warmCaches,
dataArchive: () => (new DataArchiver).process(),
}
const command = process.argv[2];

2474
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -14,10 +14,12 @@
"social:test:bluesky": "node app/index.mjs socialTestBluesky",
"splatnet": "node app/index.mjs splatnet",
"splatnet:all": "node app/index.mjs splatnetAll",
"warmCaches": "node app/index.mjs warmCaches"
"warmCaches": "node app/index.mjs warmCaches",
"data:archive": "node app/index.mjs dataArchive"
},
"dependencies": {
"@atproto/api": "^0.2.9",
"@aws-sdk/client-s3": "^3.341.0",
"@headlessui/vue": "^1.7.3",
"@heroicons/vue": "^2.0.12",
"@intlify/vite-plugin-vue-i18n": "^6.0.3",