Add VFS layer to avoid downloading files only needed for existence checks

Uses S3 ListObjectsV2 to build an in-memory file listing at startup,
allowing exists() and olderThan() to resolve from the listing instead
of requiring files on disk. sync:download now skips assets/splatnet/,
data/xrank/, data/festivals.ranking.*, and status-screenshots/.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matt Isenhower 2026-02-22 09:26:05 -08:00
parent 6b686fb7f4
commit 41c9f9a315
6 changed files with 180 additions and 3 deletions

View File

@ -1,10 +1,16 @@
import fs from 'fs/promises';
import vfs from './vfs.mjs';
export function mkdirp(dir) {
return fs.mkdir(dir, { recursive: true });
}
export async function exists(file) {
const vfsResult = vfs.has(file);
if (vfsResult !== null) {
return vfsResult;
}
try {
await fs.access(file);
@ -16,6 +22,11 @@ export async function exists(file) {
// Determine whether a file is older than a given cutoff date (or doesn't exist)
export async function olderThan(file, cutoff) {
const mtime = vfs.getMtime(file);
if (mtime !== null) {
return mtime < cutoff;
}
try {
let stat = await fs.stat(file);

142
app/common/vfs.mjs Normal file
View File

@ -0,0 +1,142 @@
import { S3Client, ListObjectsV2Command } from '@aws-sdk/client-s3';
class VirtualFileSystem {
// Map of S3 key to { lastModified: Date, size: number }
_listing = new Map();
_loaded = false;
_trackedPrefixes = [];
_localPrefix = 'dist';
get _canUseS3() {
return !!(
process.env.AWS_ACCESS_KEY_ID &&
process.env.AWS_SECRET_ACCESS_KEY &&
process.env.AWS_S3_BUCKET
);
}
get _s3Client() {
return this.__s3Client ??= new S3Client({
endpoint: process.env.AWS_S3_ENDPOINT,
region: process.env.AWS_REGION,
credentials: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
},
});
}
/**
* Load S3 listing for the given prefixes.
* Call once at startup before any exists/olderThan checks.
* @param {string[]} prefixes
*/
async loadFromS3(prefixes) {
if (!this._canUseS3) {
return;
}
this._trackedPrefixes = prefixes;
const bucket = process.env.AWS_S3_BUCKET;
console.log('[VFS] Loading S3 listing...');
for (const prefix of prefixes) {
let continuationToken;
let count = 0;
do {
const response = await this._s3Client.send(new ListObjectsV2Command({
Bucket: bucket,
Prefix: prefix,
ContinuationToken: continuationToken,
}));
for (const obj of response.Contents ?? []) {
this._listing.set(obj.Key, {
lastModified: obj.LastModified,
size: obj.Size,
});
count++;
}
continuationToken = response.IsTruncated
? response.NextContinuationToken
: undefined;
} while (continuationToken);
console.log(`[VFS] Loaded ${count} entries for prefix "${prefix}"`);
}
this._loaded = true;
}
/**
* Check if a local file path is known to exist in the VFS listing.
* Returns true/false if the path is within a tracked prefix,
* or null if VFS has no opinion (not loaded, or path outside tracked prefixes).
* @param {string} localPath
*/
has(localPath) {
if (!this._loaded) return null;
const key = this._localPathToKey(localPath);
if (key === null) return null;
if (!this._isTrackedKey(key)) return null;
return this._listing.has(key);
}
/**
* Get the last modified time for a file from the S3 listing.
* Returns Date if found, null if not tracked or VFS not loaded.
* @param {string} localPath
*/
getMtime(localPath) {
if (!this._loaded) return null;
const key = this._localPathToKey(localPath);
if (key === null) return null;
if (!this._isTrackedKey(key)) return null;
const entry = this._listing.get(key);
return entry ? entry.lastModified : null;
}
/**
* Track a file that was just written locally.
* Ensures subsequent has() calls return true without hitting disk.
* @param {string} localPath
*/
track(localPath) {
if (!this._loaded) return;
const key = this._localPathToKey(localPath);
if (key === null) return;
this._listing.set(key, {
lastModified: new Date(),
size: 0,
});
}
/**
* Convert a local path (e.g. 'dist/assets/splatnet/foo.png')
* to an S3 key (e.g. 'assets/splatnet/foo.png').
* @param {string} localPath
*/
_localPathToKey(localPath) {
const prefix = this._localPrefix + '/';
if (localPath.startsWith(prefix)) {
return localPath.slice(prefix.length);
}
return null;
}
_isTrackedKey(key) {
return this._trackedPrefixes.some(prefix => key.startsWith(prefix));
}
}
const vfs = new VirtualFileSystem();
export default vfs;

View File

@ -4,6 +4,7 @@ import PQueue from 'p-queue';
import prefixedConsole from '../common/prefixedConsole.mjs';
import { normalizeSplatnetResourcePath } from '../common/util.mjs';
import { exists, mkdirp } from '../common/fs.mjs';
import vfs from '../common/vfs.mjs';
const queue = new PQueue({ concurrency: 4 });
@ -71,6 +72,7 @@ export default class ImageProcessor
await mkdirp(path.dirname(this.localPath(destination)));
await fs.writeFile(this.localPath(destination), result.body);
vfs.track(this.localPath(destination));
} catch (e) {
this.console.error(`Image download failed for ${destination}`, e);
}

View File

@ -268,8 +268,12 @@ export default class DataUpdater
const filename = images[event.imageUrl];
if (filename) {
const data = await fs.readFile(this.imageProcessor.localPath(filename));
imageData[event.imageUrl] = data;
try {
const data = await fs.readFile(this.imageProcessor.localPath(filename));
imageData[event.imageUrl] = data;
} catch {
// Image not available locally (may only exist in S3); skip inline embed
}
}
}

View File

@ -10,7 +10,7 @@ export default class S3Syncer
return Promise.all([
dist && this.syncClient.sync(this.publicBucket, `${this.localPath}/dist`, {
filters: this.filters,
filters: this.downloadFilters,
}),
storage && this.syncClient.sync(this.privateBucket, `${this.localPath}/storage`, {
filters: this.privateFilters,
@ -76,6 +76,20 @@ export default class S3Syncer
];
}
// Download filters skip files that are handled by the VFS layer
// (existence/mtime checks only, no content reads needed)
get downloadFilters() {
return [
{ exclude: () => true }, // Exclude everything by default
// assets/splatnet/ - handled by VFS (existence checks only)
// status-screenshots/ - regenerated by social posting
{ include: (key) => key.startsWith('data/') },
{ exclude: (key) => key.startsWith('data/archive/') },
{ exclude: (key) => key.startsWith('data/xrank/') },
{ exclude: (key) => key.startsWith('data/festivals.ranking.') },
];
}
get privateFilters() {
return [
{ exclude: (key) => key.startsWith('archive/') },

View File

@ -1,4 +1,5 @@
import S3Syncer from './S3Syncer.mjs';
import vfs from '../common/vfs.mjs';
export function canSync() {
return !!(
@ -18,6 +19,9 @@ async function doSync(download, upload) {
const syncer = new S3Syncer();
if (download) {
// Load VFS listing for prefixes that won't be downloaded
await vfs.loadFromS3(['assets/splatnet/', 'data/']);
console.info('Downloading files...');
await syncer.download();
}