Skip to content

Commit

Permalink
feat: Have ExtensionBasedMapper handle extensions correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimvh committed Oct 7, 2020
1 parent 0644f8d commit b47dc3f
Show file tree
Hide file tree
Showing 5 changed files with 382 additions and 114 deletions.
176 changes: 136 additions & 40 deletions src/storage/ExtensionBasedMapper.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import { promises as fsPromises } from 'fs';
import { posix } from 'path';
import { types } from 'mime-types';
import * as mime from 'mime-types';
import type { ResourceIdentifier } from '../ldp/representation/ResourceIdentifier';
import { APPLICATION_OCTET_STREAM, TEXT_TURTLE } from '../util/ContentTypes';
import { ConflictHttpError } from '../util/errors/ConflictHttpError';
import { NotFoundHttpError } from '../util/errors/NotFoundHttpError';
import { UnsupportedHttpError } from '../util/errors/UnsupportedHttpError';
import { trimTrailingSlashes } from '../util/Util';
import type { FileIdentifierMapper } from './FileIdentifierMapper';
import type { FileIdentifierMapper, ResourceLink } from './FileIdentifierMapper';

const { join: joinPath, normalize: normalizePath } = posix;

Expand All @@ -22,52 +24,136 @@ export interface ResourcePath {
documentName?: string;
}

/**
* A mapper that stores the content-type of resources in the file path extension.
* In case the extension of the identifier does not correspond to the correct content-type,
* a new extension will be appended (with a `$` in front of it).
* E.g. if the path is `input.ttl` with content-type `text/plain`, the path would actually be `input.ttl$.txt`.
* This new extension is stripped again when generating an identifier.
*/
export class ExtensionBasedMapper implements FileIdentifierMapper {
private readonly base: string;
private readonly prootFilepath: string;
private readonly baseRequestURI: string;
private readonly rootFilepath: string;
private readonly types: Record<string, any>;

public constructor(base: string, rootFilepath: string, overrideTypes = { acl: TEXT_TURTLE, metadata: TEXT_TURTLE }) {
this.base = base;
this.prootFilepath = rootFilepath;
this.types = { ...types, ...overrideTypes };
}

public get baseRequestURI(): string {
return trimTrailingSlashes(this.base);
}

public get rootFilepath(): string {
return trimTrailingSlashes(this.prootFilepath);
this.baseRequestURI = trimTrailingSlashes(base);
this.rootFilepath = trimTrailingSlashes(normalizePath(rootFilepath));
this.types = { ...mime.types, ...overrideTypes };
}

/**
* Strips the baseRequestURI from the identifier and checks if the stripped base URI matches the store's one.
* @param identifier - Incoming identifier.
* Maps the given resource identifier / URL to a file path.
* Determines the content-type if no content-type was provided.
* For containers the content-type input gets ignored.
* @param identifier - The input identifier.
* @param contentType - The (optional) content-type of the resource.
*
* @throws {@link NotFoundHttpError}
* If the identifier does not match the baseRequestURI path of the store.
*
* @returns Absolute path of the file.
* @returns A ResourceLink with all the necessary metadata.
*/
public mapUrlToFilePath(identifier: ResourceIdentifier, id = ''): string {
return this.getAbsolutePath(this.getRelativePath(identifier), id);
public async mapUrlToFilePath(identifier: ResourceIdentifier, contentType?: string): Promise<ResourceLink> {
let path = this.getRelativePath(identifier);

if (!path.startsWith('/')) {
throw new UnsupportedHttpError('URL needs a / after the base.');
}

if (path.includes('/..')) {
throw new UnsupportedHttpError('Disallowed /.. segment in URL.');
}

path = this.getAbsolutePath(path);

// Container
if (identifier.path.endsWith('/')) {
return {
identifier,
filePath: path,
};
}

// Would conflict with how new extensions get stored
if (/\$\.\w+$/u.test(path)) {
throw new UnsupportedHttpError('Identifiers cannot contain a dollar sign before their extension.');
}

// Existing file
if (!contentType) {
const [ , folder, documentName ] = /^(.*\/)(.*)$/u.exec(path)!;

let fileName: string | undefined;
try {
const files = await fsPromises.readdir(folder);
fileName = files.find(
(file): boolean =>
file.startsWith(documentName) && /^(?:\$\..+)?$/u.test(file.slice(documentName.length)),
);
} catch {
// Parent folder does not exist (or is not a folder)
throw new NotFoundHttpError();
}

// File doesn't exist
if (!fileName) {
throw new NotFoundHttpError();
}

return {
identifier,
filePath: joinPath(folder, fileName),
contentType: this.getContentTypeFromExtension(fileName),
};
}

// If the extension of the identifier matches a different content-type than the one that is given,
// we need to add a new extension to match the correct type.
if (contentType !== this.getContentTypeFromExtension(path)) {
const extension = mime.extension(contentType);
if (!extension) {
throw new UnsupportedHttpError(`Unsupported content-type ${contentType}.`);
}
path = `${path}$.${extension}`;
}

return {
identifier,
filePath: path,
contentType,
};
}

/**
* Strips the rootFilepath path from the filepath and adds the baseRequestURI in front of it.
* @param path - The file path.
* Maps the given file path to an URL and determines the content-type
* @param filePath - The input file path.
* @param isContainer - If the path corresponds to a file.
*
* @throws {@Link Error}
* If the file path does not match the rootFilepath path of the store.
*
* @returns Url of the file.
* @returns A ResourceLink with all the necessary metadata.
*/
public mapFilePathToUrl(path: string): string {
if (!path.startsWith(this.rootFilepath)) {
throw new Error(`File ${path} is not part of the file storage at ${this.rootFilepath}.`);
public async mapFilePathToUrl(filePath: string, isContainer: boolean): Promise<ResourceLink> {
if (!filePath.startsWith(this.rootFilepath)) {
throw new Error(`File ${filePath} is not part of the file storage at ${this.rootFilepath}.`);
}
return this.baseRequestURI + path.slice(this.rootFilepath.length);

let relative = filePath.slice(this.rootFilepath.length);
if (isContainer) {
return {
identifier: { path: encodeURI(this.baseRequestURI + relative) },
filePath,
};
}

// Files
const extension = this.getExtension(relative);
const contentType = this.getContentTypeFromExtension(relative);
if (extension && relative.endsWith(`$.${extension}`)) {
relative = relative.slice(0, -(extension.length + 2));
}

return {
identifier: { path: encodeURI(this.baseRequestURI + relative) },
filePath,
contentType,
};
}

/**
Expand All @@ -76,9 +162,19 @@ export class ExtensionBasedMapper implements FileIdentifierMapper {
*
* @returns Content type of the file.
*/
public getContentTypeFromExtension(path: string): string {
private getContentTypeFromExtension(path: string): string {
const extension = this.getExtension(path);
return (extension && this.types[extension.toLowerCase()]) || APPLICATION_OCTET_STREAM;
}

/**
* Extracts the extension (without dot) from a path.
* Custom functin since `path.extname` does not work on all cases (e.g. ".acl")
* @param path - Input path to parse.
*/
private getExtension(path: string): string | null {
const extension = /\.([^./]+)$/u.exec(path);
return (extension && this.types[extension[1].toLowerCase()]) || APPLICATION_OCTET_STREAM;
return extension && extension[1];
}

/**
Expand All @@ -88,7 +184,7 @@ export class ExtensionBasedMapper implements FileIdentifierMapper {
*
* @returns Absolute path of the file.
*/
public getAbsolutePath(path: string, identifier = ''): string {
private getAbsolutePath(path: string, identifier = ''): string {
return joinPath(this.rootFilepath, path, identifier);
}

Expand All @@ -105,7 +201,7 @@ export class ExtensionBasedMapper implements FileIdentifierMapper {
if (!identifier.path.startsWith(this.baseRequestURI)) {
throw new NotFoundHttpError();
}
return identifier.path.slice(this.baseRequestURI.length);
return decodeURI(identifier.path).slice(this.baseRequestURI.length);
}

/**
Expand All @@ -116,7 +212,7 @@ export class ExtensionBasedMapper implements FileIdentifierMapper {
* @throws {@link ConflictHttpError}
* If the root identifier is passed.
*
* @returns A ResourcePath object containing path and (optional) slug fields.
* @returns A ResourcePath object containing (absolute) path and (optional) slug fields.
*/
public extractDocumentName(identifier: ResourceIdentifier): ResourcePath {
const [ , containerPath, documentName ] = /^(.*\/)([^/]+\/?)?$/u.exec(this.getRelativePath(identifier)) ?? [];
Expand All @@ -125,9 +221,9 @@ export class ExtensionBasedMapper implements FileIdentifierMapper {
throw new ConflictHttpError('Container with that identifier already exists (root).');
}
return {
containerPath: normalizePath(containerPath),
containerPath: this.getAbsolutePath(normalizePath(containerPath)),

// If documentName is not undefined, return normalized documentName
// If documentName is defined, return normalized documentName
documentName: typeof documentName === 'string' ? normalizePath(documentName) : undefined,
};
}
Expand Down
33 changes: 26 additions & 7 deletions src/storage/FileIdentifierMapper.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,40 @@
import type { ResourceIdentifier } from '../ldp/representation/ResourceIdentifier';

export interface ResourceLink {
/**
* Identifier of a resource.
*/
identifier: ResourceIdentifier;
/**
* File path of a resource.
*/
filePath: string;
/**
* Content-type for a data resource (not defined for containers).
*/
contentType?: string;
}

/**
* Supports mapping a file to an URL and back.
*/
export interface FileIdentifierMapper {
/**
* Maps the given file path to an URL.
* @param file - The input file path.
* Maps the given file path to an URL and determines the content-type
* @param filePath - The input file path.
* @param isContainer - If the path corresponds to a file.
*
* @returns The URL as a string.
* @returns A ResourceLink with all the necessary metadata.
*/
mapFilePathToUrl: (filePath: string) => string;
mapFilePathToUrl: (filePath: string, isContainer: boolean) => Promise<ResourceLink>;
/**
* Maps the given resource identifier / URL to a file path.
* @param url - The input URL.
* Determines the content-type if no content-type was provided.
* For containers the content-type input gets ignored.
* @param identifier - The input identifier.
* @param contentType - The (optional) content-type of the resource.
*
* @returns The file path as a string.
* @returns A ResourceLink with all the necessary metadata.
*/
mapUrlToFilePath: (identifier: ResourceIdentifier) => string;
mapUrlToFilePath: (identifier: ResourceIdentifier, contentType?: string) => Promise<ResourceLink>;
}
Loading

0 comments on commit b47dc3f

Please sign in to comment.