diff --git a/app/common/fs.mjs b/app/common/fs.mjs index c673623..8f93c15 100644 --- a/app/common/fs.mjs +++ b/app/common/fs.mjs @@ -1,10 +1,16 @@ import fs from 'fs/promises'; +import vfs from './vfs.mjs'; export function mkdirp(dir) { return fs.mkdir(dir, { recursive: true }); } export async function exists(file) { + const vfsResult = vfs.has(file); + if (vfsResult !== null) { + return vfsResult; + } + try { await fs.access(file); @@ -16,6 +22,11 @@ export async function exists(file) { // Determine whether a file is older than a given cutoff date (or doesn't exist) export async function olderThan(file, cutoff) { + const mtime = vfs.getMtime(file); + if (mtime !== null) { + return mtime < cutoff; + } + try { let stat = await fs.stat(file); diff --git a/app/common/vfs.mjs b/app/common/vfs.mjs new file mode 100644 index 0000000..1b64f59 --- /dev/null +++ b/app/common/vfs.mjs @@ -0,0 +1,142 @@ +import { S3Client, ListObjectsV2Command } from '@aws-sdk/client-s3'; + +class VirtualFileSystem { + // Map of S3 key to { lastModified: Date, size: number } + _listing = new Map(); + _loaded = false; + _trackedPrefixes = []; + _localPrefix = 'dist'; + + get _canUseS3() { + return !!( + process.env.AWS_ACCESS_KEY_ID && + process.env.AWS_SECRET_ACCESS_KEY && + process.env.AWS_S3_BUCKET + ); + } + + get _s3Client() { + return this.__s3Client ??= new S3Client({ + endpoint: process.env.AWS_S3_ENDPOINT, + region: process.env.AWS_REGION, + credentials: { + accessKeyId: process.env.AWS_ACCESS_KEY_ID, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY, + }, + }); + } + + /** + * Load S3 listing for the given prefixes. + * Call once at startup before any exists/olderThan checks. + * @param {string[]} prefixes + */ + async loadFromS3(prefixes) { + if (!this._canUseS3) { + return; + } + + this._trackedPrefixes = prefixes; + const bucket = process.env.AWS_S3_BUCKET; + + console.log('[VFS] Loading S3 listing...'); + + for (const prefix of prefixes) { + let continuationToken; + let count = 0; + + do { + const response = await this._s3Client.send(new ListObjectsV2Command({ + Bucket: bucket, + Prefix: prefix, + ContinuationToken: continuationToken, + })); + + for (const obj of response.Contents ?? []) { + this._listing.set(obj.Key, { + lastModified: obj.LastModified, + size: obj.Size, + }); + count++; + } + + continuationToken = response.IsTruncated + ? response.NextContinuationToken + : undefined; + } while (continuationToken); + + console.log(`[VFS] Loaded ${count} entries for prefix "${prefix}"`); + } + + this._loaded = true; + } + + /** + * Check if a local file path is known to exist in the VFS listing. + * Returns true/false if the path is within a tracked prefix, + * or null if VFS has no opinion (not loaded, or path outside tracked prefixes). + * @param {string} localPath + */ + has(localPath) { + if (!this._loaded) return null; + + const key = this._localPathToKey(localPath); + if (key === null) return null; + if (!this._isTrackedKey(key)) return null; + + return this._listing.has(key); + } + + /** + * Get the last modified time for a file from the S3 listing. + * Returns Date if found, null if not tracked or VFS not loaded. + * @param {string} localPath + */ + getMtime(localPath) { + if (!this._loaded) return null; + + const key = this._localPathToKey(localPath); + if (key === null) return null; + if (!this._isTrackedKey(key)) return null; + + const entry = this._listing.get(key); + return entry ? entry.lastModified : null; + } + + /** + * Track a file that was just written locally. + * Ensures subsequent has() calls return true without hitting disk. + * @param {string} localPath + */ + track(localPath) { + if (!this._loaded) return; + + const key = this._localPathToKey(localPath); + if (key === null) return; + + this._listing.set(key, { + lastModified: new Date(), + size: 0, + }); + } + + /** + * Convert a local path (e.g. 'dist/assets/splatnet/foo.png') + * to an S3 key (e.g. 'assets/splatnet/foo.png'). + * @param {string} localPath + */ + _localPathToKey(localPath) { + const prefix = this._localPrefix + '/'; + if (localPath.startsWith(prefix)) { + return localPath.slice(prefix.length); + } + return null; + } + + _isTrackedKey(key) { + return this._trackedPrefixes.some(prefix => key.startsWith(prefix)); + } +} + +const vfs = new VirtualFileSystem(); +export default vfs; diff --git a/app/data/ImageProcessor.mjs b/app/data/ImageProcessor.mjs index 254b36f..343949c 100644 --- a/app/data/ImageProcessor.mjs +++ b/app/data/ImageProcessor.mjs @@ -4,6 +4,7 @@ import PQueue from 'p-queue'; import prefixedConsole from '../common/prefixedConsole.mjs'; import { normalizeSplatnetResourcePath } from '../common/util.mjs'; import { exists, mkdirp } from '../common/fs.mjs'; +import vfs from '../common/vfs.mjs'; const queue = new PQueue({ concurrency: 4 }); @@ -71,6 +72,7 @@ export default class ImageProcessor await mkdirp(path.dirname(this.localPath(destination))); await fs.writeFile(this.localPath(destination), result.body); + vfs.track(this.localPath(destination)); } catch (e) { this.console.error(`Image download failed for ${destination}`, e); } diff --git a/app/data/updaters/DataUpdater.mjs b/app/data/updaters/DataUpdater.mjs index 46c546c..6573778 100644 --- a/app/data/updaters/DataUpdater.mjs +++ b/app/data/updaters/DataUpdater.mjs @@ -268,8 +268,12 @@ export default class DataUpdater const filename = images[event.imageUrl]; if (filename) { - const data = await fs.readFile(this.imageProcessor.localPath(filename)); - imageData[event.imageUrl] = data; + try { + const data = await fs.readFile(this.imageProcessor.localPath(filename)); + imageData[event.imageUrl] = data; + } catch { + // Image not available locally (may only exist in S3); skip inline embed + } } } diff --git a/app/sync/S3Syncer.mjs b/app/sync/S3Syncer.mjs index 911eac6..1e0c070 100644 --- a/app/sync/S3Syncer.mjs +++ b/app/sync/S3Syncer.mjs @@ -10,7 +10,7 @@ export default class S3Syncer return Promise.all([ dist && this.syncClient.sync(this.publicBucket, `${this.localPath}/dist`, { - filters: this.filters, + filters: this.downloadFilters, }), storage && this.syncClient.sync(this.privateBucket, `${this.localPath}/storage`, { filters: this.privateFilters, @@ -76,6 +76,20 @@ export default class S3Syncer ]; } + // Download filters skip files that are handled by the VFS layer + // (existence/mtime checks only, no content reads needed) + get downloadFilters() { + return [ + { exclude: () => true }, // Exclude everything by default + // assets/splatnet/ - handled by VFS (existence checks only) + // status-screenshots/ - regenerated by social posting + { include: (key) => key.startsWith('data/') }, + { exclude: (key) => key.startsWith('data/archive/') }, + { exclude: (key) => key.startsWith('data/xrank/') }, + { exclude: (key) => key.startsWith('data/festivals.ranking.') }, + ]; + } + get privateFilters() { return [ { exclude: (key) => key.startsWith('archive/') }, diff --git a/app/sync/index.mjs b/app/sync/index.mjs index ddb7dea..5e4b3da 100644 --- a/app/sync/index.mjs +++ b/app/sync/index.mjs @@ -1,4 +1,5 @@ import S3Syncer from './S3Syncer.mjs'; +import vfs from '../common/vfs.mjs'; export function canSync() { return !!( @@ -18,6 +19,9 @@ async function doSync(download, upload) { const syncer = new S3Syncer(); if (download) { + // Load VFS listing for prefixes that won't be downloaded + await vfs.loadFromS3(['assets/splatnet/', 'data/']); + console.info('Downloading files...'); await syncer.download(); }