Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SDTurbo pipeline #15

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions examples/react/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ const pipelines = [
hasImg2Img: false,
hasControlNet: false
},
{
name: 'SD Turbo (2.6GB)',
repo: 'cyrildiagne/sdturbo-onnx',
revision: 'main',
fp16: true,
width: 512,
height: 512,
steps: 1,
hasImg2Img: false,
hasControlNet: false,
},
// {
// name: 'LCM Dreamshaper FP32 (4.2GB)',
// repo: 'aislamov/lcm-dreamshaper-v7-onnx',
Expand Down
4 changes: 4 additions & 0 deletions src/pipelines/DiffusionPipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ export class DiffusionPipeline {
if (typeof index.controlnet !== 'undefined') {
return StableDiffusionControlNetPipeline.fromPretrained(modelRepoOrPath, options)
}
// temp hack to identify the SD Turbo model
if (index.scheduler[1] === 'EulerDiscreteScheduler') {
return SDTurboPipeline.fromPretrained(modelRepoOrPath, options)
}
return StableDiffusionPipeline.fromPretrained(modelRepoOrPath, options)
case 'StableDiffusionXLPipeline':
case 'ORTStableDiffusionXLPipeline':
Expand Down
151 changes: 151 additions & 0 deletions src/pipelines/SDTurboPipeline.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import { Session } from '@/backends'
import { getModelJSON } from '@/hub'
import { GetModelFileOptions } from '@/hub/common'
import { PipelineBase } from '@/pipelines/PipelineBase'
import { EulerDiscreteScheduler } from '@/schedulers/EulerDiscreteScheduler'
import { SchedulerConfig } from '@/schedulers/SchedulerBase'
import { CLIPTokenizer } from '@/tokenizers/CLIPTokenizer'
import { randomNormalTensor } from '@/util/Tensor'
import { Tensor } from '@xenova/transformers'
import { PretrainedOptions, ProgressCallback, ProgressStatus, dispatchProgress, loadModel } from './common'

export interface SDTurboInput {
prompt: string
seed?: string
width?: number
height?: number
numInferenceSteps: number
sdV1?: boolean
progressCallback?: ProgressCallback
runVaeOnEachStep?: boolean
img2imgFlag?: boolean
inputImage?: Float32Array
strength?: number
}

export class SDTurboPipeline extends PipelineBase {
declare scheduler: EulerDiscreteScheduler

constructor (unet: Session, vaeDecoder: Session, vaeEncoder: Session, textEncoder: Session, tokenizer: CLIPTokenizer, scheduler: EulerDiscreteScheduler) {
super()
this.unet = unet
this.vaeDecoder = vaeDecoder
this.vaeEncoder = vaeEncoder
this.textEncoder = textEncoder
this.tokenizer = tokenizer
this.scheduler = scheduler
this.vaeScaleFactor = 8
}

static createScheduler (config: SchedulerConfig) {
return new EulerDiscreteScheduler(
{
prediction_type: 'epsilon',
...config,
},
)
}

static async fromPretrained (modelRepoOrPath: string, options?: PretrainedOptions) {
const opts: GetModelFileOptions = {
...options,
}

// order matters because WASM memory cannot be decreased. so we load the biggest one first
const unet = await loadModel(
modelRepoOrPath,
'unet/model.onnx',
opts,
)
const textEncoder = await loadModel(modelRepoOrPath, 'text_encoder/model.onnx', opts)
const vaeEncoder = await loadModel(modelRepoOrPath, 'vae_encoder/model.onnx', opts)
const vae = await loadModel(modelRepoOrPath, 'vae_decoder/model.onnx', opts)

const schedulerConfig = await getModelJSON(modelRepoOrPath, 'scheduler/scheduler_config.json', true, opts)
const scheduler = SDTurboPipeline.createScheduler(schedulerConfig)

const tokenizer = await CLIPTokenizer.from_pretrained(modelRepoOrPath, { ...opts, subdir: 'tokenizer' })
await dispatchProgress(opts.progressCallback, {
status: ProgressStatus.Ready,
})
return new SDTurboPipeline(unet, vae, vaeEncoder, textEncoder, tokenizer, scheduler)
}

async run (input: SDTurboInput) {
const width = input.width || 512
const height = input.height || 512
const batchSize = 1
const seed = input.seed || ''
this.scheduler.setTimesteps(input.numInferenceSteps || 1)

await dispatchProgress(input.progressCallback, {
status: ProgressStatus.EncodingPrompt,
})

const promptEmbeds = await this.encodePrompt(input.prompt)

const latentShape = [batchSize, 4, width / 8, height / 8]
let latents = randomNormalTensor(latentShape, undefined, undefined, 'float32', seed) // Normal latents used in Text-to-Image
const timesteps = this.scheduler.timesteps.data

latents = latents.mul(this.scheduler.initNoiseSigma)

let humanStep = 1
let cachedImages: Tensor[] | null = null

for (const step of timesteps) {
// for some reason v1.4 takes int64 as timestep input. ideally we should get input dtype from the model
// but currently onnxruntime-node does not give out types, only input names
const timestep = input.sdV1
? new Tensor(BigInt64Array.from([BigInt(step)]))
: new Tensor(new Float32Array([step]))
await dispatchProgress(input.progressCallback, {
status: ProgressStatus.RunningUnet,
unetTimestep: humanStep,
unetTotalSteps: timesteps.length,
})
const latentInput = this.scheduler.scaleInput(latents)

const noise = await this.unet.run(
{ sample: latentInput, timestep, encoder_hidden_states: promptEmbeds },
)

const noisePred = noise.out_sample

latents = this.scheduler.step(
noisePred,
step,
latents,
)

if (input.runVaeOnEachStep) {
await dispatchProgress(input.progressCallback, {
status: ProgressStatus.RunningVae,
unetTimestep: humanStep,
unetTotalSteps: timesteps.length,
})
cachedImages = await this.makeImages(latents)
}
humanStep++
}

await dispatchProgress(input.progressCallback, {
status: ProgressStatus.Done,
})

if (input.runVaeOnEachStep) {
return cachedImages!
}

return this.makeImages(latents)
}

async encodeImage (inputImage: Float32Array, width: number, height: number) {
const encoded = await this.vaeEncoder.run(
{ sample: new Tensor('float32', inputImage, [1, 3, width, height]) },
)

const encodedImage = encoded.latent_sample
return encodedImage.mul(0.18215)
}
}
130 changes: 130 additions & 0 deletions src/schedulers/EulerDiscreteScheduler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import { SchedulerBase, SchedulerConfig } from '@/schedulers/SchedulerBase'
import { cat, interp, linspace, randomNormalTensor, range } from '@/util/Tensor'
import { Tensor } from '@xenova/transformers'

/**
* Euler discrete scheduler
*/
export class EulerDiscreteScheduler extends SchedulerBase {
sigmas: Tensor
stepIndex: number = 0

constructor (
config: SchedulerConfig,
) {
super(config)
this.betas = linspace(
config.beta_start ** 0.5,
config.beta_end ** 0.5,
config.num_train_timesteps,
).pow(2)

this.alphas = linspace(1, 1, config.num_train_timesteps).sub(this.betas)
this.alphasCumprod = this.alphas.cumprod()

this.sigmas = linspace(1, 1, config.num_train_timesteps)
.sub(this.alphasCumprod)
.div(this.alphasCumprod)
.sqrt()
this.timesteps = linspace(
0,
config.num_train_timesteps - 1,
config.num_train_timesteps,
).reverse()

this.sigmas = cat([
this.sigmas.reverse(),
new Tensor(this.sigmas.type, [0], [1]),
])

this.config = config
}

setTimesteps (numInferenceSteps: number) {
this.numInferenceSteps = numInferenceSteps

const stepRatio = ~~(
this.config.num_train_timesteps / this.numInferenceSteps
)
this.timesteps = range(1, numInferenceSteps + 1)
.reverse()
.mul(stepRatio)
.round()
this.timesteps = this.timesteps.sub(1)

this.sigmas = linspace(1, 1, this.config.num_train_timesteps)
.sub(this.alphasCumprod)
.div(this.alphasCumprod)
.sqrt()
this.sigmas = interp(
this.timesteps,
range(0, this.sigmas.data.length),
this.sigmas,
)

this.sigmas = cat([this.sigmas, new Tensor(this.sigmas.type, [0], [1])])

this.stepIndex = 0
}

scaleInput (input: Tensor) {
const sigma = this.sigmas.data[this.stepIndex]
const scale = (sigma ** 2 + 1) ** 0.5
return input.div(scale)
}

get initNoiseSigma () {
return Math.max(...this.sigmas.data)
}

step (
modelOutput: Tensor,
timestep: number,
sample: Tensor,
s_churn: number = 0.0,
s_tmin: number = 0.0,
s_tmax: number = Infinity,
s_noise: number = 1.0,
) {
if (this.numInferenceSteps === null) {
throw new Error(
"Number of inference steps is 'null', you need to run 'setTimesteps' after creating the scheduler",
)
}

const sigma = this.sigmas.data[this.stepIndex]

// Get gama with the equivalent of this python code
let gamma = 0.0
if (s_tmin <= sigma && sigma <= s_tmax) {
gamma = Math.min(
s_churn / (this.sigmas.data.length - 1),
Math.sqrt(2) - 1,
)
}

const noise = randomNormalTensor(modelOutput.dims)

const eps = noise.mul(s_noise)
const sigma_hat = sigma * (gamma + 1)

if (gamma > 0) {
sample = sample.add(eps.mul(sigma_hat ** 2 - sigma ** 2).sqrt())
}

// # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
// # config.prediction_type == "epsilon":
const denoised = sample.sub(modelOutput.mul(sigma_hat))

// 2. Convert to an ODE derivative
const derivative = sample.sub(denoised).div(sigma_hat)

const dt = this.sigmas.data[this.stepIndex + 1] - sigma_hat

const prevSample = sample.add(derivative.mul(dt))

this.stepIndex++

return prevSample
}
}
50 changes: 50 additions & 0 deletions src/util/Tensor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,56 @@ Tensor.prototype.cos_ = function () {
return this
}

Tensor.prototype.sqrt = function () {
return this.clone().sqrt_()
}

Tensor.prototype.sqrt_ = function () {
for (let i = 0; i < this.data.length; ++i) {
this.data[i] = Math.sqrt(this.data[i])
}
return this
}

export function interp (
x: Tensor,
xp: Tensor,
fp: Tensor,
) {
if (xp.dims.length !== 1) {
throw new Error('xp must be 1 dimensional')
}
if (fp.dims.length !== 1) {
throw new Error('fp must be 1 dimensional')
}
if (xp.dims[0] !== fp.dims[0]) {
throw new Error('xp and fp must have the same length')
}
if (x.dims.length !== 1) {
throw new Error('x must be 1 dimensional')
}
const newDims = x.dims.slice()
// @ts-ignore
const newData = new x.data.constructor(newDims.reduce((a, b) => a * b))
const left = fp.data[0]
const right = fp.data[fp.data.length - 1]
for (let i = 0; i < newData.length; ++i) {
const index = xp.data.findIndex((v) => v > x.data[i])
if (index === -1) {
newData[i] = right
} else if (index === 0) {
newData[i] = left
} else {
const x1 = xp.data[index - 1]
const x2 = xp.data[index]
const y1 = fp.data[index - 1]
const y2 = fp.data[index]
newData[i] = ((x.data[i] - x1) * (y2 - y1)) / (x2 - x1) + y1
}
}
return new Tensor(x.type, newData, newDims)
}

Tensor.prototype.location = 'cpu'

export function range (start: number, end: number, step = 1, type = 'float32') {
Expand Down