Skip to content

Commit

Permalink
feat: added simple parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
aryanjassal committed Feb 19, 2025
1 parent 09ab1c7 commit a36bb9c
Show file tree
Hide file tree
Showing 7 changed files with 283 additions and 23 deletions.
26 changes: 13 additions & 13 deletions src/Generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import * as errors from './errors';
import * as utils from './utils';
import * as constants from './constants';

// Computes the checksum by adding the value of every single byte in the header
// Computes the checksum by summing up all the bytes in the header
function computeChecksum(header: Buffer): number {
if (!header.subarray(148, 156).every((byte) => byte === 32)) {
throw new errors.ErrorVirtualTarInvalidHeader(
Expand Down Expand Up @@ -80,7 +80,7 @@ function createHeader(
utils.splitFileName(filePath, 0, HeaderSize.FILE_NAME),
HeaderOffset.FILE_NAME,
HeaderSize.FILE_NAME,
constants.HEADER_ENCODING,
constants.TEXT_ENCODING,
);

// The file permissions, or the mode, is stored in the next chunk. This is
Expand All @@ -89,23 +89,23 @@ function createHeader(
utils.pad(stat.mode ?? '', HeaderSize.FILE_MODE, '0', '\0'),
HeaderOffset.FILE_MODE,
HeaderSize.FILE_MODE,
constants.HEADER_ENCODING,
constants.TEXT_ENCODING,
);

// The owner UID is stored in this chunk
header.write(
utils.pad(stat.uid ?? '', HeaderSize.OWNER_UID, '0', '\0'),
HeaderOffset.OWNER_UID,
HeaderSize.OWNER_UID,
constants.HEADER_ENCODING,
constants.TEXT_ENCODING,
);

// The owner GID is stored in this chunk
header.write(
utils.pad(stat.gid ?? '', HeaderSize.OWNER_GID, '0', '\0'),
HeaderOffset.OWNER_GID,
HeaderSize.OWNER_GID,
constants.HEADER_ENCODING,
constants.TEXT_ENCODING,
);

// The file size is stored in this chunk. The file size must be zero for
Expand All @@ -114,7 +114,7 @@ function createHeader(
utils.pad(size ?? '', HeaderSize.FILE_SIZE, '0', '\0'),
HeaderOffset.FILE_SIZE,
HeaderSize.FILE_SIZE,
constants.HEADER_ENCODING,
constants.TEXT_ENCODING,
);

// The file mtime is stored in this chunk. As the mtime is not modified when
Expand All @@ -124,7 +124,7 @@ function createHeader(
utils.pad(time, HeaderSize.FILE_MTIME, '0', '\0'),
HeaderOffset.FILE_MTIME,
HeaderSize.FILE_MTIME,
constants.HEADER_ENCODING,
constants.TEXT_ENCODING,
);

// The checksum is calculated as the sum of all bytes in the header. It is
Expand All @@ -133,15 +133,15 @@ function createHeader(
utils.pad('', HeaderSize.CHECKSUM, ' '),
HeaderOffset.CHECKSUM,
HeaderSize.CHECKSUM,
constants.HEADER_ENCODING,
constants.TEXT_ENCODING,
);

// The type of file is written as a single byte in the header.
header.write(
type,
HeaderOffset.TYPE_FLAG,
HeaderSize.TYPE_FLAG,
constants.HEADER_ENCODING,
constants.TEXT_ENCODING,
);

// File owner name will be null, as regular stat-ing cannot extract that
Expand All @@ -153,15 +153,15 @@ function createHeader(
constants.USTAR_NAME,
HeaderOffset.USTAR_NAME,
HeaderSize.USTAR_NAME,
constants.HEADER_ENCODING,
constants.TEXT_ENCODING,
);

// This chunk stores the version of USTAR, which is '00' in this case.
header.write(
constants.USTAR_VERSION,
HeaderOffset.USTAR_VERSION,
HeaderSize.USTAR_VERSION,
constants.HEADER_ENCODING,
constants.TEXT_ENCODING,
);

// Owner user name will be null, as regular stat-ing cannot extract this
Expand All @@ -186,7 +186,7 @@ function createHeader(
),
HeaderOffset.FILE_NAME_EXTRA,
HeaderSize.FILE_NAME_EXTRA,
constants.HEADER_ENCODING,
constants.TEXT_ENCODING,
);

// Updating with the new checksum
Expand All @@ -199,7 +199,7 @@ function createHeader(
utils.pad(checksum, HeaderSize.CHECKSUM, '0', '\0 '),
HeaderOffset.CHECKSUM,
HeaderSize.CHECKSUM,
constants.HEADER_ENCODING,
constants.TEXT_ENCODING,
);

return header;
Expand Down
122 changes: 122 additions & 0 deletions src/Parser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import type { ParserState, Header, Data, End } from './types';
import { HeaderOffset, HeaderSize, EntryType } from './types';
import * as constants from './constants';
import * as errors from './errors';
import * as utils from './utils';

class Parser {
protected state: ParserState = 'ready';
protected remainingBytes = 0;

write(data: ArrayBuffer): Header | Data | End | undefined {
if (data.byteLength !== constants.BLOCK_SIZE) {
throw new errors.ErrorVirtualTarBlockSize(
`Expected block size ${constants.BLOCK_SIZE} but received ${data.byteLength}`,
);
}

// TODO: test if the first block is header by checking magic value
const view = new DataView(data, 0, constants.BLOCK_SIZE);

switch (this.state) {
case 'ready': {
if (utils.checkNullView(view)) {
this.state = 'null';
return;
}

const fileName = utils.parseFileName(view);
const fileSize = utils.extractOctal(
view,
HeaderOffset.FILE_SIZE,
HeaderSize.FILE_SIZE,
);
const fileMtime = new Date(
utils.extractOctal(
view,
HeaderOffset.FILE_MTIME,
HeaderSize.FILE_MTIME,
),
);
const fileMode = utils.extractOctal(
view,
HeaderOffset.FILE_MODE,
HeaderSize.FILE_MODE,
);
const ownerGid = utils.extractOctal(
view,
HeaderOffset.OWNER_GID,
HeaderSize.OWNER_GID,
);
const ownerUid = utils.extractOctal(
view,
HeaderOffset.OWNER_UID,
HeaderSize.OWNER_UID,
);
const ownerName = utils.extractChars(
view,
HeaderOffset.OWNER_NAME,
HeaderSize.OWNER_NAME,
);
const ownerGroupName = utils.extractChars(
view,
HeaderOffset.OWNER_GROUPNAME,
HeaderSize.OWNER_GROUPNAME,
);
const ownerUserName = utils.extractChars(
view,
HeaderOffset.OWNER_USERNAME,
HeaderSize.OWNER_USERNAME,
);
const fileType =
utils.extractChars(
view,
HeaderOffset.TYPE_FLAG,
HeaderSize.TYPE_FLAG,
) === EntryType.FILE
? 'file'
: 'directory';

if (fileType === 'file') {
this.state = 'header';
this.remainingBytes = fileSize;
}

const parsedHeader: Header = {
type: 'header',
fileType,
fileName,
fileMode,
fileMtime,
fileSize,
ownerGid,
ownerUid,
ownerName,
ownerUserName,
ownerGroupName,
};

return parsedHeader;
}
case 'header':
if (this.remainingBytes > 512) {
this.remainingBytes -= 512;
return { type: 'data', data: utils.extractBytes(view) };
} else {
const data = utils.extractBytes(view, 0, this.remainingBytes);
this.remainingBytes = 0;
this.state = 'ready';
return { type: 'data', data: data };
}

case 'null':
if (utils.checkNullView(view)) return { type: 'end' };
else throw new errors.ErrorVirtualTarEndOfArchive();

default:
utils.never('Unexpected state');
}
}
}

export default Parser;
2 changes: 1 addition & 1 deletion src/constants.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export const BLOCK_SIZE = 512;
export const USTAR_NAME = 'ustar\0';
export const USTAR_VERSION = '00';
export const HEADER_ENCODING = 'ascii';
export const TEXT_ENCODING = 'ascii';
10 changes: 10 additions & 0 deletions src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,20 @@ class ErrorVirtualTarInvalidStat<T> extends ErrorVirtualTar<T> {
static description = 'The stat contains invalid data';
}

class ErrorVirtualTarBlockSize<T> extends ErrorVirtualTar<T> {
static description = 'The block size is incorrect';
}

class ErrorVirtualTarEndOfArchive<T> extends ErrorVirtualTar<T> {
static description = 'No data can come after an end-of-archive marker';
}

export {
ErrorVirtualTar,
ErrorVirtualTarUndefinedBehaviour,
ErrorVirtualTarInvalidFileName,
ErrorVirtualTarInvalidHeader,
ErrorVirtualTarInvalidStat,
ErrorVirtualTarBlockSize,
ErrorVirtualTarEndOfArchive,
};
27 changes: 26 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,31 @@ type FileStat = {
mtime?: Date;
};

export type { FileStat };
type ParserState = 'ready' | 'header' | 'null';

type Header = {
type: 'header';
fileType: 'file' | 'directory';
fileName: string;
fileMode: number;
ownerUid: number;
ownerGid: number;
fileSize: number;
fileMtime: Date;
ownerName: string;
ownerUserName: string;
ownerGroupName: string;
};

type Data = {
type: 'data';
data: Uint8Array;
};

type End = {
type: 'end';
};

export type { FileStat, ParserState, Header, Data, End };

export { EntryType, HeaderOffset, HeaderSize };
68 changes: 67 additions & 1 deletion src/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import { HeaderOffset, HeaderSize } from './types';
import * as errors from './errors';
import * as constants from './constants';

const nullRegex = /\0/g;

function never(message: string): never {
throw new errors.ErrorVirtualTarUndefinedBehaviour(message);
Expand Down Expand Up @@ -30,4 +34,66 @@ function dateToUnixTime(date: Date): number {
return Math.round(date.getTime() / 1000);
}

export { never, pad, splitFileName, dateToUnixTime };
// PARSER

const decoder = new TextDecoder(constants.TEXT_ENCODING);

function extractBytes(
view: DataView,
offset?: number,
length?: number,
): Uint8Array {
return new Uint8Array(view.buffer, offset, length);
}

function extractChars(
view: DataView,
offset?: number,
length?: number,
): string {
return decoder
.decode(extractBytes(view, offset, length))
.replace(nullRegex, '');
}

function extractOctal(
view: DataView,
offset?: number,
length?: number,
): number {
const value = extractChars(view, offset, length);
return value.length > 0 ? parseInt(value, 8) : 0;
}

function parseFileName(view: DataView) {
const fileNameLower = extractChars(
view,
HeaderOffset.FILE_NAME,
HeaderSize.FILE_NAME,
);
const fileNameUpper = extractChars(
view,
HeaderOffset.FILE_NAME_EXTRA,
HeaderSize.FILE_NAME_EXTRA,
);
return fileNameLower + fileNameUpper;
}

function checkNullView(view: DataView): boolean {
for (let i = 0; i < constants.BLOCK_SIZE; i++) {
if (view.getUint8(i) !== 0) return false;
}
return true;
}

export {
never,
pad,
splitFileName,
dateToUnixTime,
extractBytes,
extractChars,
extractOctal,
parseFileName,
checkNullView,
};
Loading

0 comments on commit a36bb9c

Please sign in to comment.