Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix file cache concurrency #420

Merged
merged 11 commits into from
Dec 18, 2021
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Fixed
- Fixes a race condition in the file cache service (see #420)

## [0.1.0]
### Added
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@
"new-cap": "off",
"@typescript-eslint/no-unused-vars": "off",
"@typescript-eslint/no-unused-vars-experimental": "error",
"@typescript-eslint/prefer-readonly-parameter-types": "off"
"@typescript-eslint/prefer-readonly-parameter-types": "off",
"@typescript-eslint/no-implicit-any-catch": "off"
}
},
"husky": {
Expand Down
124 changes: 113 additions & 11 deletions src/services/file-cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ import sequelize from 'sequelize';
import {FileCache} from '../models/index.js';
import {TYPES} from '../types.js';
import Config from './config.js';
import PQueue from 'p-queue';
import debug from '../utils/debug.js';

@injectable()
export default class FileCacheProvider {
private static readonly evictionQueue = new PQueue({concurrency: 1});
private readonly config: Config;

constructor(@inject(TYPES.Config) config: Config) {
Expand Down Expand Up @@ -58,10 +61,14 @@ export default class FileCacheProvider {
const stats = await fs.stat(tmpPath);

if (stats.size !== 0) {
await fs.rename(tmpPath, finalPath);
}
try {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, do we need to be catching and silently continuing here? I don't think this would throw because of race conditions anymore.

await fs.rename(tmpPath, finalPath);

await FileCache.create({hash, bytes: stats.size, accessedAt: new Date()});
await FileCache.create({hash, bytes: stats.size, accessedAt: new Date()});
} catch (error) {
debug('Errored when moving a finished cache file:', error);
}
}

await this.evictOldestIfNecessary();
});
Expand All @@ -80,13 +87,19 @@ export default class FileCacheProvider {
}

private async evictOldestIfNecessary() {
const [{dataValues: {totalSizeBytes}}] = await FileCache.findAll({
attributes: [
[sequelize.fn('sum', sequelize.col('bytes')), 'totalSizeBytes'],
],
}) as unknown as [{dataValues: {totalSizeBytes: number}}];
void FileCacheProvider.evictionQueue.add(this.evictOldest.bind(this));

return FileCacheProvider.evictionQueue.onEmpty();
}

if (totalSizeBytes > this.config.CACHE_LIMIT_IN_BYTES) {
private async evictOldest() {
debug('Evicting oldest files...');

let totalSizeBytes = await this.getDiskUsageInBytes();
let numOfEvictedFiles = 0;
// Continue to evict until we're under the limit
/* eslint-disable no-await-in-loop */
while (totalSizeBytes > this.config.CACHE_LIMIT_IN_BYTES) {
const oldest = await FileCache.findOne({
order: [
['accessedAt', 'ASC'],
Expand All @@ -96,22 +109,111 @@ export default class FileCacheProvider {
if (oldest) {
await oldest.destroy();
await fs.unlink(path.join(this.config.CACHE_DIR, oldest.hash));
debug(`${oldest.hash} has been evicted`);
numOfEvictedFiles++;
}

// Continue to evict until we're under the limit
await this.evictOldestIfNecessary();
totalSizeBytes = await this.getDiskUsageInBytes();
}
/* eslint-enable no-await-in-loop */

if (numOfEvictedFiles > 0) {
debug(`${numOfEvictedFiles} files have been evicted`);
} else {
debug(`No files needed to be evicted. Total size of the cache is currently ${totalSizeBytes} bytes, and the cache limit is ${this.config.CACHE_LIMIT_IN_BYTES} bytes.`);
}
}

private async removeOrphans() {
// Check filesystem direction (do files exist on the disk but not in the database?)
for await (const dirent of await fs.opendir(this.config.CACHE_DIR)) {
if (dirent.isFile()) {
const model = await FileCache.findByPk(dirent.name);

if (!model) {
debug(`${dirent.name} was present on disk but was not in the database. Removing from disk.`);
await fs.unlink(path.join(this.config.CACHE_DIR, dirent.name));
}
}
}

// Check database direction (do entries exist in the database but not on the disk?)
for await (const model of this.getFindAllIterable()) {
const filePath = path.join(this.config.CACHE_DIR, model.hash);

try {
await fs.access(filePath);
} catch {
debug(`${model.hash} was present in database but was not on disk. Removing from database.`);
await model.destroy();
}
}
}

/**
* Pulls from the database rather than the filesystem,
* so may be slightly inaccurate.
* @returns the total size of the cache in bytes
*/
private async getDiskUsageInBytes() {
const [{dataValues: {totalSizeBytes}}] = await FileCache.findAll({
attributes: [
[sequelize.fn('sum', sequelize.col('bytes')), 'totalSizeBytes'],
],
}) as unknown as [{dataValues: {totalSizeBytes: number}}];

return totalSizeBytes;
}

/**
* An efficient way to iterate over all rows.
* @returns an iterable for the result of FileCache.findAll()
*/
private getFindAllIterable() {
const limit = 50;
let previousCreatedAt: Date | null = null;

let models: FileCache[] = [];

const fetchNextBatch = async () => {
let where = {};

if (previousCreatedAt) {
where = {
createdAt: {
[sequelize.Op.gt]: previousCreatedAt,
},
};
}

models = await FileCache.findAll({
where,
limit,
order: ['createdAt'],
});

if (models.length > 0) {
previousCreatedAt = models[models.length - 1].createdAt as Date;
}
};

return {
[Symbol.asyncIterator]() {
return {
async next() {
if (models.length === 0) {
await fetchNextBatch();
}

if (models.length === 0) {
// Must return value here for types to be inferred correctly
return {done: true, value: null as unknown as FileCache};
}

return {value: models.shift()!, done: false};
},
};
},
};
}
}