From 2a21352cd2925784291952d43be1ee71f0317248 Mon Sep 17 00:00:00 2001 From: Borewit Date: Tue, 13 Aug 2024 11:43:43 +0200 Subject: [PATCH 1/2] Implement hack to speed up Matroska parsing Stop reading until we receive a container element --- lib/ebml/EbmlIterator.ts | 32 ++++++--- lib/matroska/MatroskaDtd.ts | 2 +- lib/matroska/MatroskaParser.ts | 121 +++++++++++++++++++-------------- test/test-file-matroska.ts | 11 +++ 4 files changed, 104 insertions(+), 62 deletions(-) diff --git a/lib/ebml/EbmlIterator.ts b/lib/ebml/EbmlIterator.ts index 921131839..8b1fa368d 100644 --- a/lib/ebml/EbmlIterator.ts +++ b/lib/ebml/EbmlIterator.ts @@ -8,11 +8,17 @@ import * as Token from 'token-types'; const debug = initDebug('music-metadata:parser:ebml'); -interface ILinkedElementType extends IElementType { +export interface ILinkedElementType extends IElementType { + id: number; parent: ILinkedElementType | undefined; readonly container?: { [id: number]: ILinkedElementType; }; } +/** + * @return true, to quit the parser + */ +export type ElementListener = (dtdElement: ILinkedElementType, value: ValueType) => Promise; + /** * Extensible Binary Meta Language (EBML) iterator * https://en.wikipedia.org/wiki/Extensible_Binary_Meta_Language @@ -28,6 +34,7 @@ export class EbmlIterator { private ebmlMaxIDLength = 4; private ebmlMaxSizeLength = 8; + private cancel = false; /** * @param {ITokenizer} tokenizer Input @@ -42,13 +49,14 @@ export class EbmlIterator { this.parserMap.set(DataType.float, e => this.readFloat(e)); } - public async iterate(dtdElement: IElementType, posDone: number): Promise { - return this.parseContainer(linkParents(dtdElement), posDone); + public async iterate(dtdElement: IElementType, posDone: number, listener: ElementListener): Promise { + this.cancel = false; + return this.parseContainer(linkParents(dtdElement), posDone, listener); } - private async parseContainer(dtdElement: ILinkedElementType, posDone: number): Promise { + private async parseContainer(dtdElement: ILinkedElementType, posDone: number, listener: ElementListener): Promise { const tree: ITree = {}; - while (this.tokenizer.position < posDone) { + while (this.tokenizer.position < posDone && !this.cancel) { let element: IHeader; try { element = await this.readElement(); @@ -71,7 +79,7 @@ export class EbmlIterator { } debug(`Reading element: name=${getElementPath(child)}{id=0x${element.id}, container=${!!child.container}}`); if (child.container) { - const res = await this.parseContainer(child, element.len >= 0 ? this.tokenizer.position + element.len : -1); + const res = await this.parseContainer(child, element.len >= 0 ? this.tokenizer.position + element.len : -1, listener); if (child.multiple) { if (!tree[child.name]) { tree[child.name] = []; @@ -80,10 +88,13 @@ export class EbmlIterator { } else { tree[child.name] = res; } + this.cancel = await listener(child, res); } else { const parser = this.parserMap.get(child.value as DataType); if (typeof parser === 'function') { - tree[child.name] = await parser(element); + const value = await parser(element); + tree[child.name] = value; + this.cancel = await listener(child, value); } } } @@ -191,8 +202,11 @@ function readUIntBeAsBigInt(buf: Uint8Array, len: number): bigint { function linkParents(element: IElementType): ILinkedElementType { if (element.container) { Object.keys(element.container) - .map(id => (element.container as { [id: string]: ILinkedElementType; })[id]) - .forEach(child => { + .map(id => { + const child = (element.container as { [id: string]: ILinkedElementType; })[id]; + child.id = Number.parseInt(id); + return child; + }).forEach(child => { child.parent = element as ILinkedElementType; linkParents(child); }); diff --git a/lib/matroska/MatroskaDtd.ts b/lib/matroska/MatroskaDtd.ts index 92c382cd9..e9048c99d 100644 --- a/lib/matroska/MatroskaDtd.ts +++ b/lib/matroska/MatroskaDtd.ts @@ -174,6 +174,7 @@ export const matroskaDtd: IElementType = { // Cueing Data 0x1c53bb6b: { name: 'cues', + ignore: true, container: { 0xbb: { name: 'cuePoint', @@ -289,7 +290,6 @@ export const matroskaDtd: IElementType = { } } } - } } } diff --git a/lib/matroska/MatroskaParser.ts b/lib/matroska/MatroskaParser.ts index e7a911cfa..8b304d3fa 100644 --- a/lib/matroska/MatroskaParser.ts +++ b/lib/matroska/MatroskaParser.ts @@ -4,7 +4,7 @@ import type { ITokenizer } from 'strtok3'; import type { INativeMetadataCollector } from '../common/MetadataCollector.js'; import { BasicParser } from '../common/BasicParser.js'; import { matroskaDtd } from './MatroskaDtd.js'; -import { type IMatroskaDoc, type ITrackEntry, TargetType, TrackType } from './types.js'; +import { IAttachments, type IMatroskaDoc, IMatroskaSegment, ISegmentInformation, ITags, ITrackElement, type ITrackEntry, TargetType, TrackType } from './types.js'; import type { AnyTagValue, IOptions, ITrackInfo } from '../type.js'; import type { ITokenParser } from '../ParserFactory.js'; @@ -38,57 +38,64 @@ export class MatroskaParser extends BasicParser { const matroskaIterator = new EbmlIterator(this.tokenizer); debug('Initializing DTD end MatroskaIterator'); - const matroska = await matroskaIterator.iterate(matroskaDtd, containerSize) as unknown as IMatroskaDoc; - - this.metadata.setFormat('container', `EBML/${matroska.ebml.docType}`); - if (matroska.segment) { - - const info = matroska.segment.info; - if (info) { - const timecodeScale = info.timecodeScale ? info.timecodeScale :1000000; - if (typeof info.duration === 'number') { - const duration = info.duration * timecodeScale / 1000000000; - await this.addTag('segment:title', info.title); - this.metadata.setFormat('duration', Number(duration)); + const matroska = await matroskaIterator.iterate(matroskaDtd, containerSize, async (element, value) => { + debug(`Received: name=${element.name}, value=${value}`); + switch (element.id) { + case 0x4282: // docType + this.metadata.setFormat('container', `EBML/${value}`); + break; + + case 0x1549a966: {// info + const info = value as ISegmentInformation; + const timecodeScale = info.timecodeScale ? info.timecodeScale : 1000000; + if (typeof info.duration === 'number') { + const duration = info.duration * timecodeScale / 1000000000; + await this.addTag('segment:title', info.title); + this.metadata.setFormat('duration', Number(duration)); + } } - } - - const audioTracks = matroska.segment.tracks; - if (audioTracks?.entries) { - - audioTracks.entries.forEach(entry => { - const stream: ITrackInfo = { - codecName: entry.codecID.replace('A_', '').replace('V_', ''), - codecSettings: entry.codecSettings, - flagDefault: entry.flagDefault, - flagLacing: entry.flagLacing, - flagEnabled: entry.flagEnabled, - language: entry.language, - name: entry.name, - type: entry.trackType, - audio: entry.audio, - video: entry.video - }; - this.metadata.addStreamInfo(stream); - }); - - const audioTrack = audioTracks.entries - .filter(entry => entry.trackType === TrackType.audio) - .reduce((acc: ITrackEntry | null, cur: ITrackEntry): ITrackEntry => { - if (!acc) return cur; - if (cur.flagDefault && !acc.flagDefault) return cur; - if (cur.trackNumber < acc.trackNumber) return cur; - return acc; - }, null); - - if (audioTrack) { - this.metadata.setFormat('codec', audioTrack.codecID.replace('A_', '')); - this.metadata.setFormat('sampleRate', audioTrack.audio.samplingFrequency); - this.metadata.setFormat('numberOfChannels', audioTrack.audio.channels); + break; + + case 0x1654ae6b: { // tracks + const audioTracks = value as ITrackElement; + if (audioTracks?.entries) { + audioTracks.entries.forEach(entry => { + const stream: ITrackInfo = { + codecName: entry.codecID.replace('A_', '').replace('V_', ''), + codecSettings: entry.codecSettings, + flagDefault: entry.flagDefault, + flagLacing: entry.flagLacing, + flagEnabled: entry.flagEnabled, + language: entry.language, + name: entry.name, + type: entry.trackType, + audio: entry.audio, + video: entry.video + }; + this.metadata.addStreamInfo(stream); + }); + + const audioTrack = audioTracks.entries + .filter(entry => entry.trackType === TrackType.audio) + .reduce((acc: ITrackEntry | null, cur: ITrackEntry): ITrackEntry => { + if (!acc) return cur; + if (cur.flagDefault && !acc.flagDefault) return cur; + if (cur.trackNumber < acc.trackNumber) return cur; + return acc; + }, null); + + if (audioTrack) { + this.metadata.setFormat('codec', audioTrack.codecID.replace('A_', '')); + this.metadata.setFormat('sampleRate', audioTrack.audio.samplingFrequency); + this.metadata.setFormat('numberOfChannels', audioTrack.audio.channels); + } + } } + break; - if (matroska.segment.tags) { - await Promise.all(matroska.segment.tags.tag.map(async tag => { + case 0x1254c367: { // 'tags' + const tags = value as unknown as ITags; + await Promise.all(tags.tag.map(async tag => { const target = tag.target; const targetType = target?.targetTypeValue ? TargetType[target.targetTypeValue] : (target?.targetType ? target.targetType : 'track'); await Promise.all(tag.simpleTags.map(async simpleTag => { @@ -97,9 +104,11 @@ export class MatroskaParser extends BasicParser { })); })); } + break; - if (matroska.segment.attachments) { - await Promise.all(matroska.segment.attachments.attachedFiles + case 0x1941a469: { //attachments + const attachments = value as unknown as IAttachments; + await Promise.all(attachments.attachedFiles .filter(file => file.mimeType.startsWith('image/')) .map(file => this.addTag('picture', { data: file.data, @@ -107,9 +116,17 @@ export class MatroskaParser extends BasicParser { description: file.description, name: file.name }))); + } + break; + + //case 'cluster': + case 0x18538067: // segment + debug(`Cancel EBML parser after element ${element.name}`); + return true; // Quite EBML parser, we got the metadata we need } - } + return false; + }) as unknown as IMatroskaDoc; } private async addTag(tagId: string, value: AnyTagValue): Promise { diff --git a/test/test-file-matroska.ts b/test/test-file-matroska.ts index 99cedb8cd..7c5e73d5f 100644 --- a/test/test-file-matroska.ts +++ b/test/test-file-matroska.ts @@ -183,4 +183,15 @@ describe('Matroska formats', () => { assert.isUndefined(format.duration, 'format.duration'); }); + it('parse: 1 GB', async () => { + + const mkvPath = 'C:\\Users\\Maarten Gerbrands\\Downloads\\lg-uhd-secret-garden.mkv'; + const metadata = await mm.parseFile(mkvPath); + assert.isDefined(metadata, 'determine file-type'); + assert.strictEqual(metadata.format.container, 'EBML/matroska', 'fileType.mime'); + assert.strictEqual(metadata.format.codec, 'AC3', 'format.codec'); + assert.strictEqual(metadata.format.sampleRate, 48000, 'metadata.format.sampleRate'); + assert.approximately(metadata.format.duration, 184.69, 0.01, 'metadata.format.duration'); + }); + }); From bf40c2c5fe35b32003c9a1399022b0a49b57b52d Mon Sep 17 00:00:00 2001 From: Borewit Date: Thu, 15 Aug 2024 15:20:33 +0200 Subject: [PATCH 2/2] Parse EBML (Matroska, webm) using async notification --- lib/ebml/EbmlIterator.ts | 96 +++++++++++------- lib/ebml/types.ts | 1 - lib/matroska/MatroskaDtd.ts | 12 +-- lib/matroska/MatroskaParser.ts | 176 +++++++++++++++++---------------- lib/matroska/types.ts | 14 +-- test/test-file-matroska.ts | 13 +-- 6 files changed, 168 insertions(+), 144 deletions(-) diff --git a/lib/ebml/EbmlIterator.ts b/lib/ebml/EbmlIterator.ts index 8b1fa368d..811279aea 100644 --- a/lib/ebml/EbmlIterator.ts +++ b/lib/ebml/EbmlIterator.ts @@ -14,10 +14,21 @@ export interface ILinkedElementType extends IElementType { readonly container?: { [id: number]: ILinkedElementType; }; } +export enum ParseAction { + ReadNext = 0, // Continue reading the next elements + IgnoreElement = 2, // Ignore (do not read) this element + SkipSiblings = 3, // Skip all remaining elements at the same level + TerminateParsing = 4, // Terminate the parsing process + SkipElement = 5 // Consider the element has read, assume position is at the next element +} + /** * @return true, to quit the parser */ -export type ElementListener = (dtdElement: ILinkedElementType, value: ValueType) => Promise; +export type IElementListener = { + startNext: (dtdElement: ILinkedElementType) => ParseAction, + elementValue: (dtdElement: ILinkedElementType, value: ValueType, offset: number) => Promise +} /** * Extensible Binary Meta Language (EBML) iterator @@ -34,7 +45,6 @@ export class EbmlIterator { private ebmlMaxIDLength = 4; private ebmlMaxSizeLength = 8; - private cancel = false; /** * @param {ITokenizer} tokenizer Input @@ -49,17 +59,17 @@ export class EbmlIterator { this.parserMap.set(DataType.float, e => this.readFloat(e)); } - public async iterate(dtdElement: IElementType, posDone: number, listener: ElementListener): Promise { - this.cancel = false; + public async iterate(dtdElement: IElementType, posDone: number, listener: IElementListener): Promise { return this.parseContainer(linkParents(dtdElement), posDone, listener); } - private async parseContainer(dtdElement: ILinkedElementType, posDone: number, listener: ElementListener): Promise { + private async parseContainer(dtdElement: ILinkedElementType, posDone: number, listener: IElementListener): Promise { const tree: ITree = {}; - while (this.tokenizer.position < posDone && !this.cancel) { + while (this.tokenizer.position < posDone) { let element: IHeader; + const elementPosition= this.tokenizer.position; try { - element = await this.readElement(); + element = await this.readElement(); } catch (error) { if (error instanceof EndOfStreamError) { break; @@ -68,35 +78,53 @@ export class EbmlIterator { } const child = (dtdElement.container as { [id: number]: ILinkedElementType; })[element.id]; if (child) { - if (child.ignore) { - debug(`Ignore element: name=${getElementPath(child)}, element.id=0x${element.id}, container=${!!child.container}`); - await this.tokenizer.ignore(element.len); - } else { - if (element.id === 0x1F43B675) { - // Hack to ignore remaining segment, when cluster element received - // await this.tokenizer.ignore(posDone - this.tokenizer.position); - // break; - } - debug(`Reading element: name=${getElementPath(child)}{id=0x${element.id}, container=${!!child.container}}`); - if (child.container) { - const res = await this.parseContainer(child, element.len >= 0 ? this.tokenizer.position + element.len : -1, listener); - if (child.multiple) { - if (!tree[child.name]) { - tree[child.name] = []; + const action = listener.startNext(child); + switch (action) { + case ParseAction.ReadNext: { + if (element.id === 0x1F43B675) { + // Hack to ignore remaining segment, when cluster element received + // await this.tokenizer.ignore(posDone - this.tokenizer.position); + // break; + } + debug(`Read element: name=${getElementPath(child)}{id=0x${element.id.toString(16)}, container=${!!child.container}} at position=${elementPosition}`); + if (child.container) { + const res = await this.parseContainer(child, element.len >= 0 ? this.tokenizer.position + element.len : -1, listener); + if (child.multiple) { + if (!tree[child.name]) { + tree[child.name] = []; + } + (tree[child.name] as ITree[]).push(res); + } else { + tree[child.name] = res; } - (tree[child.name] as ITree[]).push(res); + await listener.elementValue(child, res, elementPosition); } else { - tree[child.name] = res; - } - this.cancel = await listener(child, res); - } else { - const parser = this.parserMap.get(child.value as DataType); - if (typeof parser === 'function') { - const value = await parser(element); - tree[child.name] = value; - this.cancel = await listener(child, value); + const parser = this.parserMap.get(child.value as DataType); + if (typeof parser === 'function') { + const value = await parser(element); + tree[child.name] = value; + await listener.elementValue(child, value, elementPosition); + } } - } + } break; + + case ParseAction.SkipElement: + debug(`Go to next element: name=${getElementPath(child)}, element.id=0x${element.id}, container=${!!child.container} at position=${elementPosition}`); + break; + + case ParseAction.IgnoreElement: + debug(`Ignore element: name=${getElementPath(child)}, element.id=0x${element.id}, container=${!!child.container} at position=${elementPosition}`); + await this.tokenizer.ignore(element.len); + break; + + case ParseAction.SkipSiblings: + debug(`Ignore remaining container, at: name=${getElementPath(child)}, element.id=0x${element.id}, container=${!!child.container} at position=${elementPosition}`); + await this.tokenizer.ignore(posDone - this.tokenizer.position); + break; + + case ParseAction.TerminateParsing: + debug(`Terminate parsing at element: name=${getElementPath(child)}, element.id=0x${element.id}, container=${!!child.container} at position=${elementPosition}`); + return tree; } } else { switch (element.id) { @@ -105,7 +133,7 @@ export class EbmlIterator { await this.tokenizer.ignore(element.len); break; default: - debug(`parseEbml: parent=${getElementPath(dtdElement)}, unknown child: id=${element.id.toString(16)}`); + debug(`parseEbml: parent=${getElementPath(dtdElement)}, unknown child: id=${element.id.toString(16)} at position=${elementPosition}`); this.padding += element.len; await this.tokenizer.ignore(element.len); } diff --git a/lib/ebml/types.ts b/lib/ebml/types.ts index a97113f69..810fc26e6 100644 --- a/lib/ebml/types.ts +++ b/lib/ebml/types.ts @@ -24,7 +24,6 @@ export interface IElementType { readonly value?: DataType; readonly container?: { [id: number]: IElementType; }; readonly multiple?: boolean; - readonly ignore?: boolean; } export interface IEbmlDoc { diff --git a/lib/matroska/MatroskaDtd.ts b/lib/matroska/MatroskaDtd.ts index e9048c99d..3bd32625e 100644 --- a/lib/matroska/MatroskaDtd.ts +++ b/lib/matroska/MatroskaDtd.ts @@ -27,15 +27,16 @@ export const matroskaDtd: IElementType = { name: 'segment', container: { - // Meta Seek Information + // Meta Seek Information (also known as MetaSeek) 0x114d9b74: { name: 'seekHead', container: { 0x4dbb: { name: 'seek', + multiple: true, container: { - 0x53ab: {name: 'seekId', value: DataType.binary}, - 0x53ac: {name: 'seekPosition', value: DataType.uint} + 0x53ab: {name: 'id', value: DataType.binary}, + 0x53ac: {name: 'position', value: DataType.uint} } } } @@ -69,8 +70,8 @@ export const matroskaDtd: IElementType = { 0x58d7: {name: 'silentTracks ', multiple: true}, 0xa7: {name: 'position', value: DataType.uid}, 0xab: {name: 'prevSize', value: DataType.uid}, - 0xa0: {name: 'blockGroup', ignore: true}, - 0xa3: {name: 'simpleBlock', ignore: true} + 0xa0: {name: 'blockGroup'}, + 0xa3: {name: 'simpleBlock'} } }, @@ -174,7 +175,6 @@ export const matroskaDtd: IElementType = { // Cueing Data 0x1c53bb6b: { name: 'cues', - ignore: true, container: { 0xbb: { name: 'cuePoint', diff --git a/lib/matroska/MatroskaParser.ts b/lib/matroska/MatroskaParser.ts index 8b304d3fa..7a67e7274 100644 --- a/lib/matroska/MatroskaParser.ts +++ b/lib/matroska/MatroskaParser.ts @@ -4,11 +4,11 @@ import type { ITokenizer } from 'strtok3'; import type { INativeMetadataCollector } from '../common/MetadataCollector.js'; import { BasicParser } from '../common/BasicParser.js'; import { matroskaDtd } from './MatroskaDtd.js'; -import { IAttachments, type IMatroskaDoc, IMatroskaSegment, ISegmentInformation, ITags, ITrackElement, type ITrackEntry, TargetType, TrackType } from './types.js'; +import { type IAttachments, type ISegmentInformation, type ITags, type ITrackElement, type ITrackEntry, TargetType, TrackType } from './types.js'; import type { AnyTagValue, IOptions, ITrackInfo } from '../type.js'; import type { ITokenParser } from '../ParserFactory.js'; -import { EbmlIterator } from '../ebml/EbmlIterator.js'; +import { EbmlIterator, ParseAction } from '../ebml/EbmlIterator.js'; const debug = initDebug('music-metadata:parser:matroska'); @@ -38,95 +38,103 @@ export class MatroskaParser extends BasicParser { const matroskaIterator = new EbmlIterator(this.tokenizer); debug('Initializing DTD end MatroskaIterator'); - const matroska = await matroskaIterator.iterate(matroskaDtd, containerSize, async (element, value) => { - debug(`Received: name=${element.name}, value=${value}`); - switch (element.id) { - case 0x4282: // docType - this.metadata.setFormat('container', `EBML/${value}`); - break; - - case 0x1549a966: {// info - const info = value as ISegmentInformation; - const timecodeScale = info.timecodeScale ? info.timecodeScale : 1000000; - if (typeof info.duration === 'number') { - const duration = info.duration * timecodeScale / 1000000000; - await this.addTag('segment:title', info.title); - this.metadata.setFormat('duration', Number(duration)); - } + await matroskaIterator.iterate(matroskaDtd, containerSize, { + startNext: (element) => { + switch (element.id) { + // case 0x1f43b675: // cluster + case 0x1c53bb6b: // Cueing Data + debug(`Skip element: name=${element.name}, id=0x${element.id.toString(16)}`); + return ParseAction.IgnoreElement; + case 0x1f43b675: // cluster + return ParseAction.IgnoreElement; + default: + return ParseAction.ReadNext; } - break; - - case 0x1654ae6b: { // tracks - const audioTracks = value as ITrackElement; - if (audioTracks?.entries) { - audioTracks.entries.forEach(entry => { - const stream: ITrackInfo = { - codecName: entry.codecID.replace('A_', '').replace('V_', ''), - codecSettings: entry.codecSettings, - flagDefault: entry.flagDefault, - flagLacing: entry.flagLacing, - flagEnabled: entry.flagEnabled, - language: entry.language, - name: entry.name, - type: entry.trackType, - audio: entry.audio, - video: entry.video - }; - this.metadata.addStreamInfo(stream); - }); - - const audioTrack = audioTracks.entries - .filter(entry => entry.trackType === TrackType.audio) - .reduce((acc: ITrackEntry | null, cur: ITrackEntry): ITrackEntry => { - if (!acc) return cur; - if (cur.flagDefault && !acc.flagDefault) return cur; - if (cur.trackNumber < acc.trackNumber) return cur; - return acc; - }, null); - - if (audioTrack) { - this.metadata.setFormat('codec', audioTrack.codecID.replace('A_', '')); - this.metadata.setFormat('sampleRate', audioTrack.audio.samplingFrequency); - this.metadata.setFormat('numberOfChannels', audioTrack.audio.channels); + }, + elementValue: async (element, value) => { + debug(`Received: name=${element.name}, value=${value}`); + switch (element.id) { + case 0x4282: // docType + this.metadata.setFormat('container', `EBML/${value}`); + break; + + case 0x1549a966: { // Info (Segment Information) + const info = value as ISegmentInformation; + const timecodeScale = info.timecodeScale ? info.timecodeScale : 1000000; + if (typeof info.duration === 'number') { + const duration = info.duration * timecodeScale / 1000000000; + await this.addTag('segment:title', info.title); + this.metadata.setFormat('duration', Number(duration)); } } - } - break; - - case 0x1254c367: { // 'tags' - const tags = value as unknown as ITags; - await Promise.all(tags.tag.map(async tag => { - const target = tag.target; - const targetType = target?.targetTypeValue ? TargetType[target.targetTypeValue] : (target?.targetType ? target.targetType : 'track'); - await Promise.all(tag.simpleTags.map(async simpleTag => { - const value = simpleTag.string ? simpleTag.string : simpleTag.binary; - await this.addTag(`${targetType}:${simpleTag.name}`, value); + break; + + case 0x1654ae6b: { // tracks + const audioTracks = value as ITrackElement; + if (audioTracks?.entries) { + audioTracks.entries.forEach(entry => { + const stream: ITrackInfo = { + codecName: entry.codecID.replace('A_', '').replace('V_', ''), + codecSettings: entry.codecSettings, + flagDefault: entry.flagDefault, + flagLacing: entry.flagLacing, + flagEnabled: entry.flagEnabled, + language: entry.language, + name: entry.name, + type: entry.trackType, + audio: entry.audio, + video: entry.video + }; + this.metadata.addStreamInfo(stream); + }); + + const audioTrack = audioTracks.entries + .filter(entry => entry.trackType === TrackType.audio) + .reduce((acc: ITrackEntry | null, cur: ITrackEntry): ITrackEntry => { + if (!acc) return cur; + if (cur.flagDefault && !acc.flagDefault) return cur; + if (cur.trackNumber < acc.trackNumber) return cur; + return acc; + }, null); + + if (audioTrack) { + this.metadata.setFormat('codec', audioTrack.codecID.replace('A_', '')); + this.metadata.setFormat('sampleRate', audioTrack.audio.samplingFrequency); + this.metadata.setFormat('numberOfChannels', audioTrack.audio.channels); + } + } + } + break; + + case 0x1254c367: { // tags + const tags = value as unknown as ITags; + await Promise.all(tags.tag.map(async tag => { + const target = tag.target; + const targetType = target?.targetTypeValue ? TargetType[target.targetTypeValue] : (target?.targetType ? target.targetType : 'track'); + await Promise.all(tag.simpleTags.map(async simpleTag => { + const value = simpleTag.string ? simpleTag.string : simpleTag.binary; + await this.addTag(`${targetType}:${simpleTag.name}`, value); + })); })); - })); - } - break; - - case 0x1941a469: { //attachments - const attachments = value as unknown as IAttachments; - await Promise.all(attachments.attachedFiles - .filter(file => file.mimeType.startsWith('image/')) - .map(file => this.addTag('picture', { - data: file.data, - format: file.mimeType, - description: file.description, - name: file.name - }))); + } + break; + + case 0x1941a469: { // attachments + const attachments = value as unknown as IAttachments; + await Promise.all(attachments.attachedFiles + .filter(file => file.mimeType.startsWith('image/')) + .map(file => this.addTag('picture', { + data: file.data, + format: file.mimeType, + description: file.description, + name: file.name + }))); + } + break; } - break; - - //case 'cluster': - case 0x18538067: // segment - debug(`Cancel EBML parser after element ${element.name}`); - return true; // Quite EBML parser, we got the metadata we need } - return false; - }) as unknown as IMatroskaDoc; + }); } private async addTag(tagId: string, value: AnyTagValue): Promise { diff --git a/lib/matroska/types.ts b/lib/matroska/types.ts index c59ac39dc..19f32a17b 100644 --- a/lib/matroska/types.ts +++ b/lib/matroska/types.ts @@ -1,12 +1,12 @@ import type { IEbmlDoc } from '../ebml/types.js'; -export interface ISeekHead { - id?: Uint8Array; - position?: number; +export interface ISeek { + id: Uint8Array; + position: number; } -export interface IMetaSeekInformation { - seekHeads: ISeekHead[]; +export interface ISeekHead { + seek: ISeek[]; } export interface ISegmentInformation { @@ -147,8 +147,8 @@ export interface IAttachments { } export interface IMatroskaSegment { - metaSeekInfo?: IMetaSeekInformation; - seekHeads?: ISeekHead[] + metaSeekInfo?: ISeekHead; + seekHeads?: ISeek[] info?: ISegmentInformation; tracks?: ITrackElement; tags?: ITags; diff --git a/test/test-file-matroska.ts b/test/test-file-matroska.ts index 7c5e73d5f..e2dda7d40 100644 --- a/test/test-file-matroska.ts +++ b/test/test-file-matroska.ts @@ -102,7 +102,7 @@ describe('Matroska formats', () => { const filePath = path.join(matroskaSamplePath, 'My Baby Boy.webm'); - const {format, common} = await mm.parseFile(filePath, {duration: true}); + const {format, common} = await mm.parseFile(filePath); assert.strictEqual(format.container, 'EBML/webm', 'format.container'); assert.strictEqual(format.codec, 'OPUS', 'format.codec'); @@ -183,15 +183,4 @@ describe('Matroska formats', () => { assert.isUndefined(format.duration, 'format.duration'); }); - it('parse: 1 GB', async () => { - - const mkvPath = 'C:\\Users\\Maarten Gerbrands\\Downloads\\lg-uhd-secret-garden.mkv'; - const metadata = await mm.parseFile(mkvPath); - assert.isDefined(metadata, 'determine file-type'); - assert.strictEqual(metadata.format.container, 'EBML/matroska', 'fileType.mime'); - assert.strictEqual(metadata.format.codec, 'AC3', 'format.codec'); - assert.strictEqual(metadata.format.sampleRate, 48000, 'metadata.format.sampleRate'); - assert.approximately(metadata.format.duration, 184.69, 0.01, 'metadata.format.duration'); - }); - });