Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Async EBML / Matroksa parsing #2218

Merged
merged 2 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 73 additions & 31 deletions lib/ebml/EbmlIterator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,28 @@ import * as Token from 'token-types';

const debug = initDebug('music-metadata:parser:ebml');

interface ILinkedElementType extends IElementType {
export interface ILinkedElementType extends IElementType {
id: number;
parent: ILinkedElementType | undefined;
readonly container?: { [id: number]: ILinkedElementType; };
}

export enum ParseAction {
ReadNext = 0, // Continue reading the next elements
IgnoreElement = 2, // Ignore (do not read) this element
SkipSiblings = 3, // Skip all remaining elements at the same level
TerminateParsing = 4, // Terminate the parsing process
SkipElement = 5 // Consider the element has read, assume position is at the next element
}

/**
* @return true, to quit the parser
*/
export type IElementListener = {
startNext: (dtdElement: ILinkedElementType) => ParseAction,
elementValue: (dtdElement: ILinkedElementType, value: ValueType, offset: number) => Promise<void>
}

/**
* Extensible Binary Meta Language (EBML) iterator
* https://en.wikipedia.org/wiki/Extensible_Binary_Meta_Language
Expand Down Expand Up @@ -42,16 +59,17 @@ export class EbmlIterator {
this.parserMap.set(DataType.float, e => this.readFloat(e));
}

public async iterate(dtdElement: IElementType, posDone: number): Promise<ITree> {
return this.parseContainer(linkParents(dtdElement), posDone);
public async iterate(dtdElement: IElementType, posDone: number, listener: IElementListener): Promise<ITree> {
return this.parseContainer(linkParents(dtdElement), posDone, listener);
}

private async parseContainer(dtdElement: ILinkedElementType, posDone: number): Promise<ITree> {
private async parseContainer(dtdElement: ILinkedElementType, posDone: number, listener: IElementListener): Promise<ITree> {
const tree: ITree = {};
while (this.tokenizer.position < posDone) {
let element: IHeader;
const elementPosition= this.tokenizer.position;
try {
element = await this.readElement();
element = await this.readElement();
} catch (error) {
if (error instanceof EndOfStreamError) {
break;
Expand All @@ -60,32 +78,53 @@ export class EbmlIterator {
}
const child = (dtdElement.container as { [id: number]: ILinkedElementType; })[element.id];
if (child) {
if (child.ignore) {
debug(`Ignore element: name=${getElementPath(child)}, element.id=0x${element.id}, container=${!!child.container}`);
await this.tokenizer.ignore(element.len);
} else {
if (element.id === 0x1F43B675) {
// Hack to ignore remaining segment, when cluster element received
// await this.tokenizer.ignore(posDone - this.tokenizer.position);
// break;
}
debug(`Reading element: name=${getElementPath(child)}{id=0x${element.id}, container=${!!child.container}}`);
if (child.container) {
const res = await this.parseContainer(child, element.len >= 0 ? this.tokenizer.position + element.len : -1);
if (child.multiple) {
if (!tree[child.name]) {
tree[child.name] = [];
const action = listener.startNext(child);
switch (action) {
case ParseAction.ReadNext: {
if (element.id === 0x1F43B675) {
// Hack to ignore remaining segment, when cluster element received
// await this.tokenizer.ignore(posDone - this.tokenizer.position);
// break;
}
debug(`Read element: name=${getElementPath(child)}{id=0x${element.id.toString(16)}, container=${!!child.container}} at position=${elementPosition}`);
if (child.container) {
const res = await this.parseContainer(child, element.len >= 0 ? this.tokenizer.position + element.len : -1, listener);
if (child.multiple) {
if (!tree[child.name]) {
tree[child.name] = [];
}
(tree[child.name] as ITree[]).push(res);
} else {
tree[child.name] = res;
}
(tree[child.name] as ITree[]).push(res);
await listener.elementValue(child, res, elementPosition);
} else {
tree[child.name] = res;
}
} else {
const parser = this.parserMap.get(child.value as DataType);
if (typeof parser === 'function') {
tree[child.name] = await parser(element);
const parser = this.parserMap.get(child.value as DataType);
if (typeof parser === 'function') {
const value = await parser(element);
tree[child.name] = value;
await listener.elementValue(child, value, elementPosition);
}
}
}
} break;

case ParseAction.SkipElement:
debug(`Go to next element: name=${getElementPath(child)}, element.id=0x${element.id}, container=${!!child.container} at position=${elementPosition}`);
break;

case ParseAction.IgnoreElement:
debug(`Ignore element: name=${getElementPath(child)}, element.id=0x${element.id}, container=${!!child.container} at position=${elementPosition}`);
await this.tokenizer.ignore(element.len);
break;

case ParseAction.SkipSiblings:
debug(`Ignore remaining container, at: name=${getElementPath(child)}, element.id=0x${element.id}, container=${!!child.container} at position=${elementPosition}`);
await this.tokenizer.ignore(posDone - this.tokenizer.position);
break;

case ParseAction.TerminateParsing:
debug(`Terminate parsing at element: name=${getElementPath(child)}, element.id=0x${element.id}, container=${!!child.container} at position=${elementPosition}`);
return tree;
}
} else {
switch (element.id) {
Expand All @@ -94,7 +133,7 @@ export class EbmlIterator {
await this.tokenizer.ignore(element.len);
break;
default:
debug(`parseEbml: parent=${getElementPath(dtdElement)}, unknown child: id=${element.id.toString(16)}`);
debug(`parseEbml: parent=${getElementPath(dtdElement)}, unknown child: id=${element.id.toString(16)} at position=${elementPosition}`);
this.padding += element.len;
await this.tokenizer.ignore(element.len);
}
Expand Down Expand Up @@ -191,8 +230,11 @@ function readUIntBeAsBigInt(buf: Uint8Array, len: number): bigint {
function linkParents(element: IElementType): ILinkedElementType {
if (element.container) {
Object.keys(element.container)
.map(id => (element.container as { [id: string]: ILinkedElementType; })[id])
.forEach(child => {
.map(id => {
const child = (element.container as { [id: string]: ILinkedElementType; })[id];
child.id = Number.parseInt(id);
return child;
}).forEach(child => {
child.parent = element as ILinkedElementType;
linkParents(child);
});
Expand Down
1 change: 0 additions & 1 deletion lib/ebml/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ export interface IElementType {
readonly value?: DataType;
readonly container?: { [id: number]: IElementType; };
readonly multiple?: boolean;
readonly ignore?: boolean;
}

export interface IEbmlDoc {
Expand Down
12 changes: 6 additions & 6 deletions lib/matroska/MatroskaDtd.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ export const matroskaDtd: IElementType = {
name: 'segment',
container: {

// Meta Seek Information
// Meta Seek Information (also known as MetaSeek)
0x114d9b74: {
name: 'seekHead',
container: {
0x4dbb: {
name: 'seek',
multiple: true,
container: {
0x53ab: {name: 'seekId', value: DataType.binary},
0x53ac: {name: 'seekPosition', value: DataType.uint}
0x53ab: {name: 'id', value: DataType.binary},
0x53ac: {name: 'position', value: DataType.uint}
}
}
}
Expand Down Expand Up @@ -69,8 +70,8 @@ export const matroskaDtd: IElementType = {
0x58d7: {name: 'silentTracks ', multiple: true},
0xa7: {name: 'position', value: DataType.uid},
0xab: {name: 'prevSize', value: DataType.uid},
0xa0: {name: 'blockGroup', ignore: true},
0xa3: {name: 'simpleBlock', ignore: true}
0xa0: {name: 'blockGroup'},
0xa3: {name: 'simpleBlock'}
}
},

Expand Down Expand Up @@ -289,7 +290,6 @@ export const matroskaDtd: IElementType = {
}
}
}

}
}
}
Expand Down
165 changes: 95 additions & 70 deletions lib/matroska/MatroskaParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ import type { ITokenizer } from 'strtok3';
import type { INativeMetadataCollector } from '../common/MetadataCollector.js';
import { BasicParser } from '../common/BasicParser.js';
import { matroskaDtd } from './MatroskaDtd.js';
import { type IMatroskaDoc, type ITrackEntry, TargetType, TrackType } from './types.js';
import { type IAttachments, type ISegmentInformation, type ITags, type ITrackElement, type ITrackEntry, TargetType, TrackType } from './types.js';

import type { AnyTagValue, IOptions, ITrackInfo } from '../type.js';
import type { ITokenParser } from '../ParserFactory.js';
import { EbmlIterator } from '../ebml/EbmlIterator.js';
import { EbmlIterator, ParseAction } from '../ebml/EbmlIterator.js';

const debug = initDebug('music-metadata:parser:matroska');

Expand Down Expand Up @@ -38,78 +38,103 @@ export class MatroskaParser extends BasicParser {

const matroskaIterator = new EbmlIterator(this.tokenizer);
debug('Initializing DTD end MatroskaIterator');
const matroska = await matroskaIterator.iterate(matroskaDtd, containerSize) as unknown as IMatroskaDoc;

this.metadata.setFormat('container', `EBML/${matroska.ebml.docType}`);
if (matroska.segment) {

const info = matroska.segment.info;
if (info) {
const timecodeScale = info.timecodeScale ? info.timecodeScale :1000000;
if (typeof info.duration === 'number') {
const duration = info.duration * timecodeScale / 1000000000;
await this.addTag('segment:title', info.title);
this.metadata.setFormat('duration', Number(duration));
await matroskaIterator.iterate(matroskaDtd, containerSize, {
startNext: (element) => {
switch (element.id) {
// case 0x1f43b675: // cluster
case 0x1c53bb6b: // Cueing Data
debug(`Skip element: name=${element.name}, id=0x${element.id.toString(16)}`);
return ParseAction.IgnoreElement;
case 0x1f43b675: // cluster
return ParseAction.IgnoreElement;
default:
return ParseAction.ReadNext;
}
}

const audioTracks = matroska.segment.tracks;
if (audioTracks?.entries) {

audioTracks.entries.forEach(entry => {
const stream: ITrackInfo = {
codecName: entry.codecID.replace('A_', '').replace('V_', ''),
codecSettings: entry.codecSettings,
flagDefault: entry.flagDefault,
flagLacing: entry.flagLacing,
flagEnabled: entry.flagEnabled,
language: entry.language,
name: entry.name,
type: entry.trackType,
audio: entry.audio,
video: entry.video
};
this.metadata.addStreamInfo(stream);
});

const audioTrack = audioTracks.entries
.filter(entry => entry.trackType === TrackType.audio)
.reduce((acc: ITrackEntry | null, cur: ITrackEntry): ITrackEntry => {
if (!acc) return cur;
if (cur.flagDefault && !acc.flagDefault) return cur;
if (cur.trackNumber < acc.trackNumber) return cur;
return acc;
}, null);

if (audioTrack) {
this.metadata.setFormat('codec', audioTrack.codecID.replace('A_', ''));
this.metadata.setFormat('sampleRate', audioTrack.audio.samplingFrequency);
this.metadata.setFormat('numberOfChannels', audioTrack.audio.channels);
}

if (matroska.segment.tags) {
await Promise.all(matroska.segment.tags.tag.map(async tag => {
const target = tag.target;
const targetType = target?.targetTypeValue ? TargetType[target.targetTypeValue] : (target?.targetType ? target.targetType : 'track');
await Promise.all(tag.simpleTags.map(async simpleTag => {
const value = simpleTag.string ? simpleTag.string : simpleTag.binary;
await this.addTag(`${targetType}:${simpleTag.name}`, value);
},
elementValue: async (element, value) => {
debug(`Received: name=${element.name}, value=${value}`);
switch (element.id) {
case 0x4282: // docType
this.metadata.setFormat('container', `EBML/${value}`);
break;

case 0x1549a966: { // Info (Segment Information)
const info = value as ISegmentInformation;
const timecodeScale = info.timecodeScale ? info.timecodeScale : 1000000;
if (typeof info.duration === 'number') {
const duration = info.duration * timecodeScale / 1000000000;
await this.addTag('segment:title', info.title);
this.metadata.setFormat('duration', Number(duration));
}
}
break;

case 0x1654ae6b: { // tracks
const audioTracks = value as ITrackElement;
if (audioTracks?.entries) {
audioTracks.entries.forEach(entry => {
const stream: ITrackInfo = {
codecName: entry.codecID.replace('A_', '').replace('V_', ''),
codecSettings: entry.codecSettings,
flagDefault: entry.flagDefault,
flagLacing: entry.flagLacing,
flagEnabled: entry.flagEnabled,
language: entry.language,
name: entry.name,
type: entry.trackType,
audio: entry.audio,
video: entry.video
};
this.metadata.addStreamInfo(stream);
});

const audioTrack = audioTracks.entries
.filter(entry => entry.trackType === TrackType.audio)
.reduce((acc: ITrackEntry | null, cur: ITrackEntry): ITrackEntry => {
if (!acc) return cur;
if (cur.flagDefault && !acc.flagDefault) return cur;
if (cur.trackNumber < acc.trackNumber) return cur;
return acc;
}, null);

if (audioTrack) {
this.metadata.setFormat('codec', audioTrack.codecID.replace('A_', ''));
this.metadata.setFormat('sampleRate', audioTrack.audio.samplingFrequency);
this.metadata.setFormat('numberOfChannels', audioTrack.audio.channels);
}
}
}
break;

case 0x1254c367: { // tags
const tags = value as unknown as ITags;
await Promise.all(tags.tag.map(async tag => {
const target = tag.target;
const targetType = target?.targetTypeValue ? TargetType[target.targetTypeValue] : (target?.targetType ? target.targetType : 'track');
await Promise.all(tag.simpleTags.map(async simpleTag => {
const value = simpleTag.string ? simpleTag.string : simpleTag.binary;
await this.addTag(`${targetType}:${simpleTag.name}`, value);
}));
}));
}));
}

if (matroska.segment.attachments) {
await Promise.all(matroska.segment.attachments.attachedFiles
.filter(file => file.mimeType.startsWith('image/'))
.map(file => this.addTag('picture', {
data: file.data,
format: file.mimeType,
description: file.description,
name: file.name
})));
}
break;

case 0x1941a469: { // attachments
const attachments = value as unknown as IAttachments;
await Promise.all(attachments.attachedFiles
.filter(file => file.mimeType.startsWith('image/'))
.map(file => this.addTag('picture', {
data: file.data,
format: file.mimeType,
description: file.description,
name: file.name
})));

}
break;
}
}
}
});
}

private async addTag(tagId: string, value: AnyTagValue): Promise<void> {
Expand Down
Loading
Loading