Skip to content

Commit

Permalink
Improve Whisper language detection performance
Browse files Browse the repository at this point in the history
  • Loading branch information
ae9is committed Dec 16, 2024
1 parent 7cd642c commit ecdd598
Showing 3 changed files with 40 additions and 2 deletions.
7 changes: 7 additions & 0 deletions src/generation/configuration_utils.js
Original file line number Diff line number Diff line change
@@ -197,6 +197,13 @@ export class GenerationConfig {
*/
bad_words_ids = null;

/**
* List of token ids that are allowed to be generated.
* @type {number[][]}
* @default null
*/
good_words_ids = null;

/**
* List of token ids that must be generated.
* If given a `number[][]`, this is treated as a simple list of words that must be included, the opposite to `bad_words_ids`.
13 changes: 13 additions & 0 deletions src/generation/stopping_criteria.js
Original file line number Diff line number Diff line change
@@ -154,3 +154,16 @@ export class InterruptableStoppingCriteria extends StoppingCriteria {
return new Array(input_ids.length).fill(this.interrupted);
}
}

/**
* This class can be used to always stop generation after one pass.
*/
export class AlwaysStopCriteria extends StoppingCriteria {
constructor() {
super();
}

_call(input_ids, scores) {
return new Array(input_ids.length).fill(true);
}
}
22 changes: 20 additions & 2 deletions src/models.js
Original file line number Diff line number Diff line change
@@ -90,6 +90,7 @@ import {
TopKLogitsWarper,
TopPLogitsWarper,
ClassifierFreeGuidanceLogitsProcessor,
OnlyGoodWordsLogitsProcessor,
} from './generation/logits_process.js';

import {
@@ -112,7 +113,7 @@ import {
import { RawImage } from './utils/image.js';

import { dynamic_time_warping, max, medianFilter } from './utils/maths.js';
import { EosTokenCriteria, MaxLengthCriteria, StoppingCriteriaList } from './generation/stopping_criteria.js';
import { AlwaysStopCriteria, EosTokenCriteria, MaxLengthCriteria, StoppingCriteriaList } from './generation/stopping_criteria.js';
import { LogitsSampler } from './generation/logits_sampler.js';
import { apis } from './env.js';

@@ -1212,6 +1213,10 @@ export class PreTrainedModel extends Callable {
processors.push(new NoBadWordsLogitsProcessor(generation_config.bad_words_ids, generation_config.eos_token_id));
}

if (generation_config.good_words_ids !== null) {
processors.push(new OnlyGoodWordsLogitsProcessor(generation_config.good_words_ids, generation_config.eos_token_id));
}

if (generation_config.min_length !== null && generation_config.eos_token_id !== null && generation_config.min_length > 0) {
processors.push(new MinLengthLogitsProcessor(generation_config.min_length, generation_config.eos_token_id));
}
@@ -3137,7 +3142,20 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
if (!all_lang_ids || all_lang_ids.length <= 0) {
throw new Error("Cannot detect language without language code to token ID map for model");
}
const output = await this.generate({ ...options, decoder_input_ids });
const stopping_criteria = new StoppingCriteriaList();
stopping_criteria.push(new AlwaysStopCriteria());
const good_words_ids = [all_lang_ids];
const output = await this.generate({
...options,
generation_config: {
...generation_config,
good_words_ids,
num_beams: 1,
do_sample: false,
},
stopping_criteria,
decoder_input_ids,
});
const sane = Array.from((/**@type {Tensor}**/(output)).data).flatMap(x => Number(x));
const lang_ids = sane.filter(x => Object.values(generation_config.lang_to_id).includes(x));
return lang_ids;

0 comments on commit ecdd598

Please sign in to comment.