microsoft · pelikhan · Apr 25, 2025 · Apr 24, 2025 · Apr 24, 2025 · Apr 25, 2025
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -14,6 +14,7 @@
         "arrayify",
         "astgrep",
         "astrojs",
+        "autocrop",
         "Automatable",
         "autopad",
         "azurecontentsafety",
@@ -22,6 +23,7 @@
         "azuretoken",
         "bitindex",
         "blockslist",
+        "bufferlike",
         "BYOG",
         "cancellers",
         "cctx",
@@ -45,6 +47,7 @@
         "createfile",
         "cringy",
         "cybercrime",
+        "dall",
         "DALLE",
         "dbgc",
         "dbgp",

diff --git a/docs/genaisrc/blog-image.genai.mts b/docs/genaisrc/blog-image.genai.mts
@@ -7,10 +7,10 @@
    // phase 1: generate image prompt
    const style =
        "iconic, 2D, 8-bit, corporate, 5-color, simple, geometric, no people, no text"
     const { text: imagePrompt } = await runPrompt(
         (_) => {
             _.def("BLOG_POST", MD.content(file.content))
-            _.$`Generate an image prompt for DALLE-3 that illustrates the contents of <BLOG_POST>.
+            _.$`Generate an image prompt for gpt-image-1 that illustrates the contents of <BLOG_POST>.
 Include specific description related to the content of <BLOG_POST>.        
     ${style}`
         },
@@ -24,10 +24,9 @@
     ${style}`,
         {
             mime: "image/png",
-            size: "1792x1024",
-            scale: 768 / 1792,
+            size: "landscape",
             maxHeight: 762,
-            style: "vivid",
+            model: "openai:gpt-image-1",
         }
     )
 

diff --git a/docs/public/blog/gpt-image-1.mp3 b/docs/public/blog/gpt-image-1.mp3
diff --git a/docs/public/blog/gpt-image-1.txt b/docs/public/blog/gpt-image-1.txt
@@ -0,0 +1 @@
+OpenAI just introduced gpt-image-1, the latest AI model designed to generate images from plain text prompts. This update is now available through both OpenAI’s own API and Azure AI Foundry, making it even easier for anyone to experiment with image generation. To test its abilities, the team generated pixel art cats with the same prompt across three models—DALL·E 2, DALL·E 3, and gpt-image-1—highlighting how each interprets the same creative task. The results show different visual styles and a unique flair from the new gpt-image-1, which brings fresh options for creators seeking distinctive, high-detail pixel art. With this launch, you can now compare outputs side by side, observing nuanced changes in how these models translate words into images—and discover which one fits your next idea best.
diff --git a/.../docs/blog/88daddda0cbe49a60fe7b11db44b2f037c0e70f8469884df13e0bbaff8bb66de.png b/.../docs/blog/88daddda0cbe49a60fe7b11db44b2f037c0e70f8469884df13e0bbaff8bb66de.png
diff --git a/.../docs/blog/8ce06ae2b0bd7193701d7914faf3faf9b384ae6d3d8cb1d29113b47900aad66a.png b/.../docs/blog/8ce06ae2b0bd7193701d7914faf3faf9b384ae6d3d8cb1d29113b47900aad66a.png
diff --git a/.../docs/blog/9c8d4a6bd2b023110b8e716ca48acae431401adf1c8d816c9b986abefa6acafe.png b/.../docs/blog/9c8d4a6bd2b023110b8e716ca48acae431401adf1c8d816c9b986abefa6acafe.png
diff --git a/docs/src/content/docs/blog/gpt-image-1.mdx b/docs/src/content/docs/blog/gpt-image-1.mdx
@@ -0,0 +1,83 @@
+---
+title: GPT-Image-1
+description: A new model for generating images from text prompts
+author: pelikhan
+date: 2025-04-25
+cover:
+  alt: Three side-by-side square frames, each showing a uniquely posed 8-bit style
+    pixel cat. Each frame visually represents image generation from different AI
+    models, using five flat corporate colors and minimalist geometric
+    backgrounds. The cats are simple, highly pixelated, and visually distinct
+    from one another, with no text or people present, creating a clean,
+    corporate, and comparative visual suitable for a blog.
+  image: ./gpt-image-1.png
+tags:
+  - openai gpt-image-1
+  - image generation
+  - dall-e comparison
+  - azure ai foundry
+  - pixel art cat
+excerpt: Our team just launched support for the new OpenAI gpt-image-1 image
+  generation model, now available through both OpenAI’s API and Azure AI
+  Foundry. We compared gpt-image-1 to DALL·E 2 and DALL·E 3 by generating 8-bit
+  pixel cat images using the same prompt. Each model produces distinct visual
+  results, and gpt-image-1 brings its own style and interpretation. This update
+  helps you evaluate how current generative models handle familiar creative
+  tasks while leveraging advances in image synthesis. Try running the same
+  workflows you use for existing models to see how output and prompt handling
+  differ with gpt-image-1.
+
+---
+import BlogNarration from "../../../components/BlogNarration.astro"
+
+<BlogNarration />
+
+We've added support for the new OpenAI `gpt-image-1` image generation model.
+You can try out through OpenAI's API or Azure AI Foundry.
+
+```js 'model: "openai:gpt-image-1"'
+... = await generateImage("...", {
+    model: "openai:gpt-image-1",
+})
+```
+
+To compare the performance of this model, here is a little script that
+generate an pixelated cat image on DallE-2/3 and `gpt-image-1`.
+
+```js title="images.genai.mjs" wrap
+const { output } = env
+for (const model of [
+    "openai:dall-e-2",
+    "openai:dall-e-3",
+    "openai:gpt-image-1",
+]) {
+    output.heading(3, `Model: ${model}`)
+    const { image, revisedPrompt } = await generateImage(
+        `a cute cat. only one. iconic, high details. 8-bit resolution.`,
+        {
+            maxWidth: 400,
+            mime: "image/png",
+            model,
+            size: "square",
+        }
+    )
+    await env.output.image(image.filename)
+    output.fence(revisedPrompt)
+}
+```
+
+### Model: openai:dall-e-2
+
+![image](./88daddda0cbe49a60fe7b11db44b2f037c0e70f8469884df13e0bbaff8bb66de.png)
+
+### Model: openai:dall-e-3
+
+![image](./8ce06ae2b0bd7193701d7914faf3faf9b384ae6d3d8cb1d29113b47900aad66a.png)
+
+```
+Visualize an adorable single feline, lavishly detailed, represented in charming 8-bit resolution. This cat is incredibly distinctive and recognizable, with unique features that make it stand out from the norm. Consider adding intricate patterns on its fur or any other unusual characteristics to boost the iconic nature of this cute cat.
+```
+
+### Model: openai:gpt-image-1
+
+![image](./9c8d4a6bd2b023110b8e716ca48acae431401adf1c8d816c9b986abefa6acafe.png)
diff --git a/docs/src/content/docs/blog/gpt-image-1.png b/docs/src/content/docs/blog/gpt-image-1.png
diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts
@@ -167,7 +167,7 @@ export type SpeechFunction = (
 export type CreateImageRequest = {
     model: string
     prompt: string
-    quality?: "hd"
+    quality?: string
     size?: string
     style?: string
 }

diff --git a/packages/core/src/image.ts b/packages/core/src/image.ts
@@ -1,6 +1,3 @@
-import debug from "debug"
-const dbg = debug("genaiscript:image")
-
 // Import necessary functions and types from other modules
 import { resolveBufferLike } from "./bufferlike"
 import {
@@ -16,7 +13,9 @@
 import { CancellationOptions, checkCancelled } from "./cancellation"
 import { wrapColor, wrapRgbColor } from "./consolecolor"
 import { assert } from "console"
+import { genaiscriptDebug } from "./debug"
+const dbg = genaiscriptDebug("image")
 
 async function prepare(
     url: BufferLike,
    options: ImageGenerationOptions &
@@ -75,15 +74,33 @@
 
     // Contain the image within specified max dimensions if provided
     if (options.maxWidth ?? options.maxHeight) {
-        dbg(
-            `containing image within ${options.maxWidth || ""}x${options.maxHeight || ""}`
-        )
-        contain(
-            img,
-            img.width > maxWidth ? maxWidth : img.width,
-            img.height > maxHeight ? maxHeight : img.height,
-            HorizontalAlign.CENTER | VerticalAlign.MIDDLE
-        )
+        if (options.maxWidth && !options.maxHeight) {
+            if (img.width > options.maxWidth) {
+                dbg(`resize width to %d`, options.maxWidth)
+                img.resize({
+                    w: options.maxWidth,
+                    h: Math.ceil((img.height / img.width) * options.maxWidth),
+                })
+            }
+        } else if (options.maxHeight && !options.maxWidth) {
+            if (img.height > options.maxHeight) {
+                dbg(`resize height to %d`, options.maxHeight)
+                img.resize({
+                    h: options.maxHeight,
+                    w: Math.ceil((img.width / img.height) * options.maxHeight),
+                })
+            }
+        } else {
+            dbg(
+                `containing image within ${options.maxWidth || ""}x${options.maxHeight || ""}`
+            )
+            contain(
+                img,
+                img.width > maxWidth ? maxWidth : img.width,
+                img.height > maxHeight ? maxHeight : img.height,
+                HorizontalAlign.CENTER | VerticalAlign.MIDDLE
+            )
+        }
     }
 
     // Auto-crop the image if required by options

diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts
@@ -16,7 +16,7 @@
    TOOL_NAME,
    TOOL_URL,
 } from "./constants"
 import { estimateTokens } from "./tokens"
 import {
    ChatCompletionHandler,
    CreateImageRequest,
@@ -762,15 +762,56 @@
     const { trace } = options || {}
     let url = `${cfg.base}/images/generations`
 
-    const body = {
+    const isDallE = /^dall-e/i.test(model)
+    const isDallE2 = /^dall-e-2/i.test(model)
+    const isDallE3 = /^dall-e-3/i.test(model)
+    const isGpt = /^gpt-image/i.test(model)
+
+    const body: any = {
         model,
         prompt,
         size,
         quality,
         style,
-        response_format: "b64_json",
         ...rest,
     }
+
+    // auto is the default quality, so always delete it
+    if (body.quality === "auto" || isDallE2) delete body.quality
+    if (isDallE3) {
+        if (body.quality === "high") body.quality = "hd"
+        else delete body.quality
+    }
+    if (isGpt && body.quality === "hd") body.quality = "high"
+    if (!isDallE3) delete body.style
+    if (isDallE) body.response_format = "b64_json"
+
+    if (isDallE3) {
+        if (body.size === "portrait") body.size = "1024x1792"
+        else if (body.size === "landscape") body.size = "1792x1024"
+        else if (body.size === "square") body.size = "1024x1024"
+    } else if (isDallE2) {
+        if (
+            body.size === "portrait" ||
+            body.size === "landscape" ||
+            body.size === "square"
+        )
+            body.size = "1024x1024"
+    } else if (isGpt) {
+        if (body.size === "portrait") body.size = "1024x1536"
+        else if (body.size === "landscape") body.size = "1536x1024"
+        else if (body.size === "square") body.size = "1024x1024"
+    }
+
+    if (body.size === "auto") delete body.size
+
+    dbg("%o", {
+        quality: body.quality,
+        style: body.style,
+        response_format: body.response_format,
+        size: body.size,
+    })
+
     if (cfg.type === "azure") {
         const version = cfg.version || AZURE_OPENAI_API_VERSION
         trace?.itemValue(`version`, version)
@@ -784,7 +825,9 @@
 
     const fetch = await createFetch(options)
     try {
-        logInfo(`generate image with ${cfg.provider}:${cfg.model}`)
+        logInfo(
+            `generate image with ${cfg.provider}:${cfg.model} (this may take a while)`
+        )
         const freq = {
             method: "POST",
             headers: {
@@ -797,13 +840,15 @@
         trace?.itemValue(`url`, `[${url}](${url})`)
         traceFetchPost(trace, url, freq.headers, body)
         const res = await fetch(url, freq as any)
+        dbg(`response: %d %s`, res.status, res.statusText)
         trace?.itemValue(`status`, `${res.status} ${res.statusText}`)
         if (!res.ok)
             return {
                 image: undefined,
                 error: (await res.json())?.error || res.statusText,
             }
         const j = await res.json()
+        dbg(`%O`, j)
         const revisedPrompt = j.data[0]?.revised_prompt
         if (revisedPrompt)
             trace?.details(`📷 revised prompt`, j.data[0].revised_prompt)

diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts
@@ -17,7 +17,7 @@

 interface Diagnostic {
    filename: string
    range: CharRange
    severity: DiagnosticSeverity
    message: string
    /**
@@ -307,7 +307,7 @@
 >
 
 type ModelImageGenerationType = OptionsOrString<
-    "openai:dall-e-2" | "openai:dall-e-3"
+    "openai:gpt-image-1" | "openai:dall-e-2" | "openai:dall-e-3"
 >
 
 type ModelProviderType = OptionsOrString<
@@ -4380,10 +4380,35 @@
 
 interface ImageGenerationOptions extends ImageTransformOptions {
     model?: OptionsOrString<ModelImageGenerationType>
-    quality?: "hd"
+    /**
+     * The quality of the image that will be generated.
+     * auto (default value) will automatically select the best quality for the given model.
+     * high, medium and low are supported for gpt-image-1.
+     * high is supported for dall-e-3.
+     * dall-e-2 ignores this flag
+     */
+    quality?: "auto" | "low" | "medium" | "high"
+    /**
+     * Image size.
+     * For gpt-image-1: 1024x1024, 1536x1024 (landscape), 1024x1536 (portrait), or auto (default value)
+     * For dall-e: 256x256, 512x512, or 1024x1024 for dall-e-2, and one of 1024x1024, 1792x1024.
+     */
     size?: OptionsOrString<
-        "256x256" | "512x512" | "1024x1024" | "1024x1792" | "1792x1024"
+        | "auto"
+        | "landscape"
+        | "portrait"
+        | "square"
+        | "1536x1024"
+        | "1024x1536"
+        | "256x256"
+        | "512x512"
+        | "1024x1024"
+        | "1024x1792"
+        | "1792x1024"
     >
+    /**
+     * Only used for DALL-E 3
+     */
     style?: OptionsOrString<"vivid" | "natural">
 }
 

diff --git a/packages/sample/genaisrc/image-gen.genai.mjs b/packages/sample/genaisrc/image-gen.genai.mjs
@@ -1,6 +1,20 @@
-const { image, revisedPrompt } = await generateImage(
-    `a cute cat. only one. iconic, high details. 8-bit resolution.`,
-    { maxWidth: 400, mime: "image/png" }
-)
-env.output.image(image.filename)
-env.output.fence(revisedPrompt)
+const { output } = env
+for (const model of [
+    "openai:dall-e-2",
+    "openai:dall-e-3",
+    "openai:gpt-image-1",
+]) {
+    output.heading(3, `Model: ${model}`)
+    const { image, revisedPrompt } = await generateImage(
+        `a cute cat. only one. iconic, high details. 8-bit resolution.`,
+        {
+            maxWidth: 400,
+            autoCrop: true,
+            mime: "image/png",
+            model,
+            size: "square",
+        }
+    )
+    await env.output.image(image.filename)
+    output.fence(revisedPrompt)
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		OpenAI just introduced gpt-image-1, the latest AI model designed to generate images from plain text prompts. This update is now available through both OpenAI’s own API and Azure AI Foundry, making it even easier for anyone to experiment with image generation. To test its abilities, the team generated pixel art cats with the same prompt across three models—DALL·E 2, DALL·E 3, and gpt-image-1—highlighting how each interprets the same creative task. The results show different visual styles and a unique flair from the new gpt-image-1, which brings fresh options for creators seeking distinctive, high-detail pixel art. With this launch, you can now compare outputs side by side, observing nuanced changes in how these models translate words into images—and discover which one fits your next idea best.