diff --git a/packages/api/package.json b/packages/api/package.json index 65578977d..0d9b1a5ce 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -22,6 +22,7 @@ "prepare:type-check": "tsc --pretty --noEmit", "prepare": "run-s compile-schemas && run-p \"prepare:**\"", "compile-schemas": "node -r esm src/schema/compile-schemas.js", + "pull-ai-schema": "node -r esm src/schema/pull-ai-schema.js", "dev-server": "run-s compile-schemas && node dist/cli.js", "redoc": "nodemon -w src/schema/schema.yaml -x npm run prepare:redoc", "siserver": "nodemon -w dist -x node -r esm dist/stream-info-service.js -e js,yaml", diff --git a/packages/api/src/schema/ai-api-schema.yaml b/packages/api/src/schema/ai-api-schema.yaml new file mode 100644 index 000000000..d650032dc --- /dev/null +++ b/packages/api/src/schema/ai-api-schema.yaml @@ -0,0 +1,866 @@ +openapi: 3.1.0 +paths: + /api/beta/generate/text-to-image: + post: + tags: + - generate + summary: Text To Image + description: Generate images from text prompts. + operationId: genTextToImage + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/TextToImageParams" + required: true + responses: + "200": + description: Successful Response + content: + application/json: + schema: + $ref: "#/components/schemas/ImageResponse" + x-speakeasy-name-override: data + "400": + description: Bad Request + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - &ref_0 + $ref: "#/components/schemas/error" + "401": + description: Unauthorized + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_0 + "422": + description: Validation Error + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPValidationError" + - *ref_0 + "500": + description: Internal Server Error + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_0 + default: + description: Error + content: + application/json: + schema: *ref_0 + security: + - HTTPBearer: [] + x-speakeasy-name-override: textToImage + /api/beta/generate/image-to-image: + post: + tags: + - generate + summary: Image To Image + description: Apply image transformations to a provided image. + operationId: genImageToImage + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/Body_genImageToImage" + required: true + responses: + "200": + description: Successful Response + content: + application/json: + schema: + $ref: "#/components/schemas/ImageResponse" + x-speakeasy-name-override: data + "400": + description: Bad Request + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - &ref_1 + $ref: "#/components/schemas/error" + "401": + description: Unauthorized + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_1 + "422": + description: Validation Error + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPValidationError" + - *ref_1 + "500": + description: Internal Server Error + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_1 + default: + description: Error + content: + application/json: + schema: *ref_1 + security: + - HTTPBearer: [] + x-speakeasy-name-override: imageToImage + /api/beta/generate/image-to-video: + post: + tags: + - generate + summary: Image To Video + description: Generate a video from a provided image. + operationId: genImageToVideo + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/Body_genImageToVideo" + required: true + responses: + "200": + description: Successful Response + content: + application/json: + schema: + $ref: "#/components/schemas/VideoResponse" + x-speakeasy-name-override: data + "400": + description: Bad Request + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - &ref_2 + $ref: "#/components/schemas/error" + "401": + description: Unauthorized + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_2 + "422": + description: Validation Error + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPValidationError" + - *ref_2 + "500": + description: Internal Server Error + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_2 + default: + description: Error + content: + application/json: + schema: *ref_2 + security: + - HTTPBearer: [] + x-speakeasy-name-override: imageToVideo + /api/beta/generate/upscale: + post: + tags: + - generate + summary: Upscale + description: Upscale an image by increasing its resolution. + operationId: genUpscale + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/Body_genUpscale" + required: true + responses: + "200": + description: Successful Response + content: + application/json: + schema: + $ref: "#/components/schemas/ImageResponse" + x-speakeasy-name-override: data + "400": + description: Bad Request + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - &ref_3 + $ref: "#/components/schemas/error" + "401": + description: Unauthorized + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_3 + "422": + description: Validation Error + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPValidationError" + - *ref_3 + "500": + description: Internal Server Error + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_3 + default: + description: Error + content: + application/json: + schema: *ref_3 + security: + - HTTPBearer: [] + x-speakeasy-name-override: upscale + /api/beta/generate/audio-to-text: + post: + tags: + - generate + summary: Audio To Text + description: Transcribe audio files to text. + operationId: genAudioToText + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/Body_genAudioToText" + required: true + responses: + "200": + description: Successful Response + content: + application/json: + schema: + $ref: "#/components/schemas/TextResponse" + x-speakeasy-name-override: data + "400": + description: Bad Request + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - &ref_4 + $ref: "#/components/schemas/error" + "401": + description: Unauthorized + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_4 + "413": + description: Request Entity Too Large + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_4 + "422": + description: Validation Error + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPValidationError" + - *ref_4 + "500": + description: Internal Server Error + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_4 + default: + description: Error + content: + application/json: + schema: *ref_4 + security: + - HTTPBearer: [] + x-speakeasy-name-override: audioToText + /api/beta/generate/segment-anything-2: + post: + tags: + - generate + summary: Segment Anything 2 + description: Segment objects in an image. + operationId: genSegmentAnything2 + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/Body_genSegmentAnything2" + required: true + responses: + "200": + description: Successful Response + content: + application/json: + schema: + $ref: "#/components/schemas/MasksResponse" + x-speakeasy-name-override: data + "400": + description: Bad Request + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - &ref_5 + $ref: "#/components/schemas/error" + "401": + description: Unauthorized + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_5 + "422": + description: Validation Error + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPValidationError" + - *ref_5 + "500": + description: Internal Server Error + content: + application/json: + schema: + oneOf: + - $ref: "#/components/schemas/HTTPError" + - *ref_5 + default: + description: Error + content: + application/json: + schema: *ref_5 + security: + - HTTPBearer: [] + x-speakeasy-name-override: segmentAnything2 +components: + schemas: + APIError: + properties: + msg: + type: string + title: Msg + description: The error message. + type: object + required: + - msg + title: APIError + description: API error response model. + Body_genAudioToText: + properties: + audio: + type: string + format: binary + title: Audio + description: Uploaded audio file to be transcribed. + model_id: + type: string + title: Model Id + description: Hugging Face model ID used for transcription. + default: "" + type: object + required: + - audio + - model_id + title: Body_genAudioToText + Body_genImageToImage: + properties: + prompt: + type: string + title: Prompt + description: Text prompt(s) to guide image generation. + image: + type: string + format: binary + title: Image + description: Uploaded image to modify with the pipeline. + model_id: + type: string + title: Model Id + description: Hugging Face model ID used for image generation. + default: "" + strength: + type: number + title: Strength + description: + Degree of transformation applied to the reference image (0 to 1). + default: 0.8 + guidance_scale: + type: number + title: Guidance Scale + description: >- + Encourages model to generate images closely linked to the text + prompt (higher values may reduce image quality). + default: 7.5 + image_guidance_scale: + type: number + title: Image Guidance Scale + description: >- + Degree to which the generated image is pushed towards the initial + image. + default: 1.5 + negative_prompt: + type: string + title: Negative Prompt + description: >- + Text prompt(s) to guide what to exclude from image generation. + Ignored if guidance_scale < 1. + default: "" + safety_check: + type: boolean + title: Safety Check + description: >- + Perform a safety check to estimate if generated images could be + offensive or harmful. + default: true + seed: + type: integer + title: Seed + description: Seed for random number generation. + num_inference_steps: + type: integer + title: Num Inference Steps + description: >- + Number of denoising steps. More steps usually lead to higher quality + images but slower inference. Modulated by strength. + default: 100 + num_images_per_prompt: + type: integer + title: Num Images Per Prompt + description: Number of images to generate per prompt. + default: 1 + type: object + required: + - prompt + - image + - model_id + title: Body_genImageToImage + Body_genImageToVideo: + properties: + image: + type: string + format: binary + title: Image + description: Uploaded image to generate a video from. + model_id: + type: string + title: Model Id + description: Hugging Face model ID used for video generation. + default: "" + height: + type: integer + title: Height + description: The height in pixels of the generated video. + default: 576 + width: + type: integer + title: Width + description: The width in pixels of the generated video. + default: 1024 + fps: + type: integer + title: Fps + description: The frames per second of the generated video. + default: 6 + motion_bucket_id: + type: integer + title: Motion Bucket Id + description: >- + Used for conditioning the amount of motion for the generation. The + higher the number the more motion will be in the video. + default: 127 + noise_aug_strength: + type: number + title: Noise Aug Strength + description: >- + Amount of noise added to the conditioning image. Higher values + reduce resemblance to the conditioning image and increase motion. + default: 0.02 + safety_check: + type: boolean + title: Safety Check + description: >- + Perform a safety check to estimate if generated images could be + offensive or harmful. + default: true + seed: + type: integer + title: Seed + description: Seed for random number generation. + num_inference_steps: + type: integer + title: Num Inference Steps + description: >- + Number of denoising steps. More steps usually lead to higher quality + images but slower inference. Modulated by strength. + default: 25 + type: object + required: + - image + - model_id + title: Body_genImageToVideo + Body_genSegmentAnything2: + properties: + image: + type: string + format: binary + title: Image + description: Image to segment. + model_id: + type: string + title: Model Id + description: Hugging Face model ID used for image generation. + default: "" + point_coords: + type: string + title: Point Coords + description: >- + Nx2 array of point prompts to the model, where each point is in + (X,Y) in pixels. + point_labels: + type: string + title: Point Labels + description: >- + Labels for the point prompts, where 1 indicates a foreground point + and 0 indicates a background point. + box: + type: string + title: Box + description: + "A length 4 array given as a box prompt to the model, in XYXY + format." + mask_input: + type: string + title: Mask Input + description: >- + A low-resolution mask input to the model, typically from a previous + prediction iteration, with the form 1xHxW (H=W=256 for SAM). + multimask_output: + type: boolean + title: Multimask Output + description: >- + If true, the model will return three masks for ambiguous input + prompts, often producing better masks than a single prediction. + default: true + return_logits: + type: boolean + title: Return Logits + description: >- + If true, returns un-thresholded mask logits instead of a binary + mask. + default: true + normalize_coords: + type: boolean + title: Normalize Coords + description: >- + If true, the point coordinates will be normalized to the range + [0,1], with point_coords expected to be with respect to image + dimensions. + default: true + type: object + required: + - image + - model_id + title: Body_genSegmentAnything2 + Body_genUpscale: + properties: + prompt: + type: string + title: Prompt + description: Text prompt(s) to guide upscaled image generation. + image: + type: string + format: binary + title: Image + description: Uploaded image to modify with the pipeline. + model_id: + type: string + title: Model Id + description: Hugging Face model ID used for upscaled image generation. + default: "" + safety_check: + type: boolean + title: Safety Check + description: >- + Perform a safety check to estimate if generated images could be + offensive or harmful. + default: true + seed: + type: integer + title: Seed + description: Seed for random number generation. + num_inference_steps: + type: integer + title: Num Inference Steps + description: >- + Number of denoising steps. More steps usually lead to higher quality + images but slower inference. Modulated by strength. + default: 75 + type: object + required: + - prompt + - image + - model_id + title: Body_genUpscale + HTTPError: + properties: + detail: + allOf: + - $ref: "#/components/schemas/APIError" + description: Detailed error information. + type: object + required: + - detail + title: HTTPError + description: HTTP error response model. + HTTPValidationError: + properties: + detail: + items: + $ref: "#/components/schemas/ValidationError" + type: array + title: Detail + type: object + title: HTTPValidationError + ImageResponse: + properties: + images: + items: + $ref: "#/components/schemas/Media" + type: array + title: Images + description: The generated images. + type: object + required: + - images + title: ImageResponse + description: Response model for image generation. + MasksResponse: + properties: + masks: + type: string + title: Masks + description: The generated masks. + scores: + type: string + title: Scores + description: The model's confidence scores for each generated mask. + logits: + type: string + title: Logits + description: + "The raw, unnormalized predictions (logits) for the masks." + type: object + required: + - masks + - scores + - logits + title: MasksResponse + description: Response model for object segmentation. + Media: + properties: + url: + type: string + title: Url + description: The URL where the media can be accessed. + seed: + type: integer + title: Seed + description: The seed used to generate the media. + nsfw: + type: boolean + title: Nsfw + description: Whether the media was flagged as NSFW. + type: object + required: + - url + - seed + - nsfw + title: Media + description: + A media object containing information about the generated media. + TextResponse: + properties: + text: + type: string + title: Text + description: The generated text. + chunks: + items: + $ref: "#/components/schemas/chunk" + type: array + title: Chunks + description: The generated text chunks. + type: object + required: + - text + - chunks + title: TextResponse + description: Response model for text generation. + TextToImageParams: + properties: + model_id: + type: string + title: Model Id + description: Hugging Face model ID used for image generation. + default: SG161222/RealVisXL_V4.0_Lightning + prompt: + type: string + title: Prompt + description: >- + Text prompt(s) to guide image generation. Separate multiple prompts + with '|' if supported by the model. + height: + type: integer + title: Height + description: The height in pixels of the generated image. + default: 576 + width: + type: integer + title: Width + description: The width in pixels of the generated image. + default: 1024 + guidance_scale: + type: number + title: Guidance Scale + description: >- + Encourages model to generate images closely linked to the text + prompt (higher values may reduce image quality). + default: 7.5 + negative_prompt: + type: string + title: Negative Prompt + description: >- + Text prompt(s) to guide what to exclude from image generation. + Ignored if guidance_scale < 1. + default: "" + safety_check: + type: boolean + title: Safety Check + description: >- + Perform a safety check to estimate if generated images could be + offensive or harmful. + default: true + seed: + type: integer + title: Seed + description: Seed for random number generation. + num_inference_steps: + type: integer + title: Num Inference Steps + description: >- + Number of denoising steps. More steps usually lead to higher quality + images but slower inference. Modulated by strength. + default: 50 + num_images_per_prompt: + type: integer + title: Num Images Per Prompt + description: Number of images to generate per prompt. + default: 1 + type: object + required: + - prompt + - model_id + title: TextToImageParams + ValidationError: + properties: + loc: + items: + anyOf: + - type: string + - type: integer + type: array + title: Location + msg: + type: string + title: Message + type: + type: string + title: Error Type + type: object + required: + - loc + - msg + - type + title: ValidationError + VideoResponse: + properties: + images: + items: + $ref: "#/components/schemas/Media" + type: array + title: Images + description: The generated images. + type: object + required: + - images + title: VideoResponse + description: Response model for image generation. + chunk: + properties: + timestamp: + items: {} + type: array + title: Timestamp + description: The timestamp of the chunk. + text: + type: string + title: Text + description: The text of the chunk. + type: object + required: + - timestamp + - text + title: chunk + description: A chunk of text with a timestamp. + securitySchemes: + HTTPBearer: + type: http + scheme: bearer diff --git a/packages/api/src/schema/api-schema.yaml b/packages/api/src/schema/api-schema.yaml index 12fafc8e0..1ef3d52d2 100644 --- a/packages/api/src/schema/api-schema.yaml +++ b/packages/api/src/schema/api-schema.yaml @@ -35,6 +35,7 @@ tags: description: Operations related to tasks api - name: generate description: Operations related to AI generate api +$ref: "./ai-api-schema.yaml" components: securitySchemes: apiKey: @@ -2860,241 +2861,6 @@ components: targetSegmentSizeSecs: $ref: >- #/components/schemas/new-asset-payload/properties/targetSegmentSizeSecs - # AI Generate payloads. Keep in mind that these use snake_case instead of camelCase since - # they implement the same interface as the AI Gateway Livepeer node. - audio-to-text-payload: - type: object - required: - - audio - properties: - audio: - type: string - format: binary - maxLength: 10485760 # 10MiB - model_id: - type: string - default: openai/whisper-large-v3 - enum: - - openai/whisper-large-v3 - text-to-image-payload: - type: object - required: - - prompt - additionalProperties: false - properties: - prompt: - type: string - model_id: - type: string - default: SG161222/RealVisXL_V4.0_Lightning - enum: - - SG161222/RealVisXL_V4.0_Lightning - - ByteDance/SDXL-Lightning - height: - type: integer - width: - type: integer - guidance_scale: - type: number - default: 7.5 - negative_prompt: - type: string - default: "" - safety_check: - type: boolean - default: true - seed: - type: integer - num_inference_steps: - type: integer - default: 50 - minimum: 1 - maximum: 200 - num_images_per_prompt: - type: integer - default: 1 - minimum: 1 - maximum: 20 - image-to-image-payload: - type: object - required: - - prompt - - image - additionalProperties: false - properties: - prompt: - type: string - image: - type: string - format: binary - maxLength: 10485760 # 10MiB - model_id: - type: string - default: timbrooks/instruct-pix2pix - enum: - - timbrooks/instruct-pix2pix - - ByteDance/SDXL-Lightning - - SG161222/RealVisXL_V4.0_Lightning - strength: - type: number - default: 0.8 - guidance_scale: - type: number - default: 7.5 - image_guidance_scale: - type: number - default: 1.5 - negative_prompt: - type: string - default: "" - safety_check: - type: boolean - default: true - seed: - type: integer - num_images_per_prompt: - type: integer - default: 1 - minimum: 1 - maximum: 20 - image-to-video-payload: - type: object - required: - - image - additionalProperties: false - properties: - image: - type: string - format: binary - maxLength: 10485760 # 10MiB - model_id: - type: string - default: stabilityai/stable-video-diffusion-img2vid-xt-1-1 - enum: - - stabilityai/stable-video-diffusion-img2vid-xt-1-1 - height: - type: integer - default: 576 - width: - type: integer - default: 1024 - fps: - type: integer - default: 6 - motion_bucket_id: - type: integer - default: 127 - noise_aug_strength: - type: number - default: 0.02 - seed: - type: integer - safety_check: - type: boolean - default: true - upscale-payload: - type: object - required: - - prompt - - image - additionalProperties: false - properties: - prompt: - type: string - image: - type: string - format: binary - maxLength: 10485760 # 10MiB - model_id: - type: string - default: stabilityai/stable-diffusion-x4-upscaler - enum: - - stabilityai/stable-diffusion-x4-upscaler - safety_check: - type: boolean - default: true - seed: - type: integer - ai-text-response: - type: object - required: - - text - - chunks - properties: - text: - type: string - chunks: - type: array - items: - type: object - required: - - timestamp - - text - properties: - timestamp: - type: array - items: {} - text: - type: string - ai-image-response: - type: object - required: - - images - properties: - images: - type: array - items: - type: object - required: - - url - - seed - - nsfw - properties: - url: - type: string - title: Url - seed: - type: integer - title: Seed - nsfw: - type: boolean - title: Nsfw - ai-error: - oneOf: - - $ref: "#/components/schemas/error" - - type: object - required: - - detail - properties: - detail: - type: object - required: - - msg - properties: - msg: - type: string - - type: object - properties: - detail: - type: array - items: - type: object - required: - - loc - - msg - - type - properties: - msg: - type: string - type: - type: string - title: Error Type - loc: - type: array - items: - anyOf: - - type: string - - type: integer paths: /stream: post: @@ -5202,139 +4968,3 @@ paths: application/json: schema: $ref: "#/components/schemas/error" - # AI APIs section, imported from AI OpenAPI spec with some adjustments - "/beta/generate/audio-to-text": - post: - operationId: genAudioToText - summary: Audio To Text - x-speakeasy-name-override: audioToText - tags: - - generate - requestBody: - required: true - content: - multipart/form-data: - schema: - $ref: "#/components/schemas/audio-to-text-payload" - responses: - "200": - description: Successful response - content: - application/json: - schema: - $ref: "#/components/schemas/ai-text-response" - x-speakeasy-name-override: data - default: - description: Error - content: - application/json: - schema: - $ref: "#/components/schemas/ai-error" - "/beta/generate/text-to-image": - post: - operationId: genTextToImage - summary: Text To Image - x-speakeasy-name-override: textToImage - tags: - - generate - requestBody: - required: true - content: - application/json: - schema: - $ref: "#/components/schemas/text-to-image-payload" - responses: - "200": - description: Successful Response - content: - application/json: - schema: - $ref: "#/components/schemas/ai-image-response" - x-speakeasy-name-override: data - default: - description: Error - content: - application/json: - schema: - $ref: "#/components/schemas/ai-error" - "/beta/generate/image-to-image": - post: - operationId: genImageToImage - x-speakeasy-name-override: imageToImage - summary: Image To Image - tags: - - generate - requestBody: - required: true - content: - multipart/form-data: - schema: - $ref: "#/components/schemas/image-to-image-payload" - responses: - "200": - description: Successful Response - content: - application/json: - schema: - $ref: "#/components/schemas/ai-image-response" - x-speakeasy-name-override: data - default: - description: Error - content: - application/json: - schema: - $ref: "#/components/schemas/ai-error" - "/beta/generate/image-to-video": - post: - operationId: genImageToVideo - x-speakeasy-name-override: imageToVideo - summary: Image To Video - tags: - - generate - requestBody: - content: - multipart/form-data: - schema: - $ref: "#/components/schemas/image-to-video-payload" - required: true - responses: - "200": - description: Successful Response - content: - application/json: - schema: - $ref: "#/components/schemas/ai-image-response" - x-speakeasy-name-override: data - default: - description: Error - content: - application/json: - schema: - $ref: "#/components/schemas/ai-error" - "/beta/generate/upscale": - post: - operationId: genUpscale - x-speakeasy-name-override: upscale - summary: Upscale - tags: - - generate - requestBody: - content: - multipart/form-data: - schema: - $ref: "#/components/schemas/upscale-payload" - required: true - responses: - "200": - description: Successful Response - content: - application/json: - schema: - $ref: "#/components/schemas/ai-image-response" - x-speakeasy-name-override: data - default: - description: Error - content: - application/json: - schema: - $ref: "#/components/schemas/ai-error" diff --git a/packages/api/src/schema/pull-ai-schema.js b/packages/api/src/schema/pull-ai-schema.js new file mode 100644 index 000000000..02e340d75 --- /dev/null +++ b/packages/api/src/schema/pull-ai-schema.js @@ -0,0 +1,102 @@ +import fs from "fs-extra"; +import { safeLoad as parseYaml, safeDump as serializeYaml } from "js-yaml"; +import path from "path"; + +// This downloads the AI schema from the AI worker repo and saves in the local +// ai-api-schema.yaml file, referenced by our main api-schema.yaml file. + +const defaultModels = { + "text-to-image": "SG161222/RealVisXL_V4.0_Lightning", + "image-to-image": "timbrooks/instruct-pix2pix", + "image-to-video": "stabilityai/stable-video-diffusion-img2vid-xt-1-1", + upscale: "stabilityai/stable-diffusion-x4-upscaler", + "audio-to-text": "openai/whisper-large-v3", +}; +const schemaDir = path.resolve(__dirname, "."); +const aiSchemaUrl = + "https://raw.githubusercontent.com/livepeer/ai-worker/refs/heads/main/runner/gateway.openapi.yaml"; + +const write = (dir, data) => { + if (fs.existsSync(dir)) { + const existing = fs.readFileSync(dir, "utf8"); + if (existing === data) { + return; + } + } + fs.writeFileSync(dir, data, "utf8"); + console.log(`wrote ${dir}`); +}; + +const mapObject = (obj, fn) => { + return Object.fromEntries( + Object.entries(obj).map(([key, value]) => fn(key, value)), + ); +}; + +const downloadAiSchema = async () => { + // download the file + const response = await fetch(aiSchemaUrl); + const data = await response.text(); + const schema = parseYaml(data); + + // remove info and servers fields + delete schema.info; + delete schema.servers; + + // patches to the paths section + schema.paths = mapObject(schema.paths, (path, value) => { + // prefix paths with /api/beta/generate + path = `/api/beta/generate${path}`; + // remove security field + delete value.security; + // add $ref: "#/components/schemas/error" as oneOf to all of the error responses + const apiError = { $ref: "#/components/schemas/error" }; + value.post.responses = mapObject( + value.post.responses, + (statusCode, response) => { + if (Math.floor(parseInt(statusCode) / 100) === 2) { + return [statusCode, response]; + } + response.content["application/json"].schema = { + oneOf: [response.content["application/json"].schema, apiError], + }; + return [statusCode, response]; + }, + ); + // add $ref: "#/components/schemas/error" as the default response + if (!value.post.responses["default"]) { + value.post.responses["default"] = { + description: "Error", + content: { "application/json": { schema: apiError } }, + }; + } + return [path, value]; + }); + + // Add default model_id to params objects + schema.components.schemas = mapObject( + schema.components.schemas, + (key, value) => { + if (!key.endsWith("Params")) { + return [key, value]; + } + // transforms PipeNameParams to pipe-name + const pipelineName = key + .slice(0, -6) + .replace(/([a-z])([A-Z])/g, "$1-$2") + .toLowerCase(); + if (pipelineName in defaultModels && value.properties.model_id) { + value.properties.model_id.default = defaultModels[pipelineName]; + } + return [key, value]; + }, + ); + + const yaml = serializeYaml(schema); + write(path.resolve(schemaDir, "ai-api-schema.yaml"), yaml); +}; + +downloadAiSchema().catch((err) => { + console.error(err); + process.exit(1); +});