transcribeSync(@HostParam("endpoint") String endpoint,
+ @QueryParam("api-version") String apiVersion, @HeaderParam("content-type") String contentType,
+ @HeaderParam("Accept") String accept, @BodyParam("multipart/form-data") BinaryData body,
+ RequestOptions requestOptions, Context context);
+ }
+
+ /**
+ * Transcribes the provided audio stream.
+ * Response Body Schema
+ *
+ *
+ * {@code
+ * {
+ * durationMilliseconds: int (Required)
+ * combinedPhrases (Required): [
+ * (Required){
+ * channel: Integer (Optional)
+ * text: String (Required)
+ * }
+ * ]
+ * phrases (Required): [
+ * (Required){
+ * channel: Integer (Optional)
+ * speaker: Integer (Optional)
+ * offsetMilliseconds: int (Required)
+ * durationMilliseconds: int (Required)
+ * text: String (Required)
+ * words (Optional): [
+ * (Optional){
+ * text: String (Required)
+ * offsetMilliseconds: int (Required)
+ * durationMilliseconds: int (Required)
+ * }
+ * ]
+ * locale: String (Optional)
+ * confidence: double (Required)
+ * }
+ * ]
+ * }
+ * }
+ *
+ *
+ * @param body The body of the multipart request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return the result of the transcribe operation along with {@link Response} on successful completion of
+ * {@link Mono}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Mono> transcribeWithResponseAsync(BinaryData body, RequestOptions requestOptions) {
+ final String contentType = "multipart/form-data";
+ final String accept = "application/json";
+ return FluxUtil.withContext(context -> service.transcribe(this.getEndpoint(),
+ this.getServiceVersion().getVersion(), contentType, accept, body, requestOptions, context));
+ }
+
+ /**
+ * Transcribes the provided audio stream.
+ * Response Body Schema
+ *
+ *
+ * {@code
+ * {
+ * durationMilliseconds: int (Required)
+ * combinedPhrases (Required): [
+ * (Required){
+ * channel: Integer (Optional)
+ * text: String (Required)
+ * }
+ * ]
+ * phrases (Required): [
+ * (Required){
+ * channel: Integer (Optional)
+ * speaker: Integer (Optional)
+ * offsetMilliseconds: int (Required)
+ * durationMilliseconds: int (Required)
+ * text: String (Required)
+ * words (Optional): [
+ * (Optional){
+ * text: String (Required)
+ * offsetMilliseconds: int (Required)
+ * durationMilliseconds: int (Required)
+ * }
+ * ]
+ * locale: String (Optional)
+ * confidence: double (Required)
+ * }
+ * ]
+ * }
+ * }
+ *
+ *
+ * @param body The body of the multipart request.
+ * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+ * @throws HttpResponseException thrown if the request is rejected by server.
+ * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+ * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+ * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+ * @return the result of the transcribe operation along with {@link Response}.
+ */
+ @ServiceMethod(returns = ReturnType.SINGLE)
+ public Response transcribeWithResponse(BinaryData body, RequestOptions requestOptions) {
+ final String contentType = "multipart/form-data";
+ final String accept = "application/json";
+ return service.transcribeSync(this.getEndpoint(), this.getServiceVersion().getVersion(), contentType, accept,
+ body, requestOptions, Context.NONE);
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/implementation/package-info.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/implementation/package-info.java
new file mode 100644
index 000000000000..140636afd3d6
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/implementation/package-info.java
@@ -0,0 +1,9 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+/**
+ *
+ * Package containing the implementations for Transcription.
+ *
+ */
+package com.azure.ai.speech.transcription.implementation;
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/AudioFileDetails.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/AudioFileDetails.java
new file mode 100644
index 000000000000..7381bc25899d
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/AudioFileDetails.java
@@ -0,0 +1,119 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.speech.transcription.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.BinaryData;
+
+/**
+ * The file details for the "audio" field.
+ */
+@Fluent
+public final class AudioFileDetails {
+
+ /*
+ * The content of the file.
+ */
+ @Generated
+ private final BinaryData content;
+
+ /*
+ * The filename of the file.
+ */
+ @Generated
+ private String filename;
+
+ /*
+ * The content-type of the file.
+ */
+ @Generated
+ private String contentType = "application/octet-stream";
+
+ /**
+ * Creates an instance of AudioFileDetails class.
+ *
+ * @param content the content value to set.
+ */
+ @Generated
+ public AudioFileDetails(BinaryData content) {
+ this.content = content;
+ }
+
+ /**
+ * Get the content property: The content of the file.
+ *
+ * @return the content value.
+ */
+ @Generated
+ public BinaryData getContent() {
+ return this.content;
+ }
+
+ /**
+ * Get the filename property: The filename of the file. If not explicitly set, a filename will be auto-generated
+ * from the contentType.
+ *
+ * @return the filename value, or an auto-generated filename if not set.
+ */
+ @Generated
+ public String getFilename() {
+ if (this.filename != null && !this.filename.isEmpty()) {
+ return this.filename;
+ }
+ if ("audio/wav".equalsIgnoreCase(this.contentType)) {
+ return "audio.wav";
+ }
+ if ("audio/mpeg".equalsIgnoreCase(this.contentType) || "audio/mp3".equalsIgnoreCase(this.contentType)) {
+ return "audio.mp3";
+ }
+ if ("audio/ogg".equalsIgnoreCase(this.contentType)) {
+ return "audio.ogg";
+ }
+ if ("audio/flac".equalsIgnoreCase(this.contentType)) {
+ return "audio.flac";
+ }
+ if ("audio/webm".equalsIgnoreCase(this.contentType)) {
+ return "audio.webm";
+ }
+ if ("audio/opus".equalsIgnoreCase(this.contentType)) {
+ return "audio.opus";
+ }
+ return "audio";
+ }
+
+ /**
+ * Set the filename property: The filename of the file.
+ *
+ * @param filename the filename value to set.
+ * @return the AudioFileDetails object itself.
+ */
+ @Generated
+ public AudioFileDetails setFilename(String filename) {
+ this.filename = filename;
+ return this;
+ }
+
+ /**
+ * Get the contentType property: The content-type of the file.
+ *
+ * @return the contentType value.
+ */
+ @Generated
+ public String getContentType() {
+ return this.contentType;
+ }
+
+ /**
+ * Set the contentType property: The content-type of the file.
+ *
+ * @param contentType the contentType value to set.
+ * @return the AudioFileDetails object itself.
+ */
+ @Generated
+ public AudioFileDetails setContentType(String contentType) {
+ this.contentType = contentType;
+ return this;
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/ChannelCombinedPhrases.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/ChannelCombinedPhrases.java
new file mode 100644
index 000000000000..a315b684bc4c
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/ChannelCombinedPhrases.java
@@ -0,0 +1,104 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.speech.transcription.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * The full transcript per channel.
+ */
+@Immutable
+public final class ChannelCombinedPhrases implements JsonSerializable {
+
+ /*
+ * The 0-based channel index. Only present if channel separation is enabled.
+ */
+ @Generated
+ private Integer channel;
+
+ /*
+ * The complete transcribed text for the channel.
+ */
+ @Generated
+ private final String text;
+
+ /**
+ * Creates an instance of ChannelCombinedPhrases class.
+ *
+ * @param text the text value to set.
+ */
+ @Generated
+ private ChannelCombinedPhrases(String text) {
+ this.text = text;
+ }
+
+ /**
+ * Get the channel property: The 0-based channel index. Only present if channel separation is enabled.
+ *
+ * @return the channel value.
+ */
+ @Generated
+ public Integer getChannel() {
+ return this.channel;
+ }
+
+ /**
+ * Get the text property: The complete transcribed text for the channel.
+ *
+ * @return the text value.
+ */
+ @Generated
+ public String getText() {
+ return this.text;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Generated
+ @Override
+ public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+ jsonWriter.writeStartObject();
+ jsonWriter.writeStringField("text", this.text);
+ jsonWriter.writeNumberField("channel", this.channel);
+ return jsonWriter.writeEndObject();
+ }
+
+ /**
+ * Reads an instance of ChannelCombinedPhrases from the JsonReader.
+ *
+ * @param jsonReader The JsonReader being read.
+ * @return An instance of ChannelCombinedPhrases if the JsonReader was pointing to an instance of it, or null if it
+ * was pointing to JSON null.
+ * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+ * @throws IOException If an error occurs while reading the ChannelCombinedPhrases.
+ */
+ @Generated
+ public static ChannelCombinedPhrases fromJson(JsonReader jsonReader) throws IOException {
+ return jsonReader.readObject(reader -> {
+ String text = null;
+ Integer channel = null;
+ while (reader.nextToken() != JsonToken.END_OBJECT) {
+ String fieldName = reader.getFieldName();
+ reader.nextToken();
+ if ("text".equals(fieldName)) {
+ text = reader.getString();
+ } else if ("channel".equals(fieldName)) {
+ channel = reader.getNullable(JsonReader::getInt);
+ } else {
+ reader.skipChildren();
+ }
+ }
+ ChannelCombinedPhrases deserializedChannelCombinedPhrases = new ChannelCombinedPhrases(text);
+ deserializedChannelCombinedPhrases.channel = channel;
+ return deserializedChannelCombinedPhrases;
+ });
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/EnhancedModeOptions.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/EnhancedModeOptions.java
new file mode 100644
index 000000000000..d013f508bb8e
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/EnhancedModeOptions.java
@@ -0,0 +1,163 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.speech.transcription.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Enhanced mode properties for transcription.
+ */
+@Fluent
+public final class EnhancedModeOptions implements JsonSerializable {
+
+ /*
+ * Enable enhanced mode for transcription. This is automatically set to true when task, targetLanguage, or prompt
+ * are specified.
+ */
+ @Generated
+ private Boolean enabled;
+
+ /*
+ * Task type for enhanced mode.
+ */
+ @Generated
+ private String task;
+
+ /*
+ * Target language for enhanced mode.
+ */
+ @Generated
+ private String targetLanguage;
+
+ /*
+ * A list of user prompts.
+ */
+ @Generated
+ private List prompts;
+
+ /**
+ * Creates an instance of EnhancedModeOptions class with enabled set to true.
+ */
+ public EnhancedModeOptions() {
+ this.enabled = true;
+ }
+
+ /**
+ * Get the task property: Task type for enhanced mode.
+ *
+ * @return the task value.
+ */
+ @Generated
+ public String getTask() {
+ return this.task;
+ }
+
+ /**
+ * Set the task property: Task type for enhanced mode.
+ *
+ * @param task the task value to set.
+ * @return the EnhancedModeOptions object itself.
+ */
+ @Generated
+ public EnhancedModeOptions setTask(String task) {
+ this.task = task;
+ return this;
+ }
+
+ /**
+ * Get the targetLanguage property: Target language for enhanced mode.
+ *
+ * @return the targetLanguage value.
+ */
+ @Generated
+ public String getTargetLanguage() {
+ return this.targetLanguage;
+ }
+
+ /**
+ * Set the targetLanguage property: Target language for enhanced mode.
+ *
+ * @param targetLanguage the targetLanguage value to set.
+ * @return the EnhancedModeOptions object itself.
+ */
+ @Generated
+ public EnhancedModeOptions setTargetLanguage(String targetLanguage) {
+ this.targetLanguage = targetLanguage;
+ return this;
+ }
+
+ /**
+ * Get the prompts property: A list of user prompts.
+ *
+ * @return the prompts value.
+ */
+ @Generated
+ public List getPrompts() {
+ return this.prompts;
+ }
+
+ /**
+ * Set the prompts property: A list of user prompts.
+ *
+ * @param prompts the prompts value to set.
+ * @return the EnhancedModeOptions object itself.
+ */
+ @Generated
+ public EnhancedModeOptions setPrompts(List prompts) {
+ this.prompts = prompts;
+ return this;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Generated
+ @Override
+ public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+ jsonWriter.writeStartObject();
+ jsonWriter.writeStringField("task", this.task);
+ jsonWriter.writeStringField("targetLanguage", this.targetLanguage);
+ jsonWriter.writeArrayField("prompt", this.prompts, (writer, element) -> writer.writeString(element));
+ return jsonWriter.writeEndObject();
+ }
+
+ /**
+ * Reads an instance of EnhancedModeOptions from the JsonReader.
+ *
+ * @param jsonReader The JsonReader being read.
+ * @return An instance of EnhancedModeOptions if the JsonReader was pointing to an instance of it, or null if it was
+ * pointing to JSON null.
+ * @throws IOException If an error occurs while reading the EnhancedModeOptions.
+ */
+ @Generated
+ public static EnhancedModeOptions fromJson(JsonReader jsonReader) throws IOException {
+ return jsonReader.readObject(reader -> {
+ EnhancedModeOptions deserializedEnhancedModeOptions = new EnhancedModeOptions();
+ while (reader.nextToken() != JsonToken.END_OBJECT) {
+ String fieldName = reader.getFieldName();
+ reader.nextToken();
+ if ("enabled".equals(fieldName)) {
+ deserializedEnhancedModeOptions.enabled = reader.getNullable(JsonReader::getBoolean);
+ } else if ("task".equals(fieldName)) {
+ deserializedEnhancedModeOptions.task = reader.getString();
+ } else if ("targetLanguage".equals(fieldName)) {
+ deserializedEnhancedModeOptions.targetLanguage = reader.getString();
+ } else if ("prompt".equals(fieldName)) {
+ List prompts = reader.readArray(reader1 -> reader1.getString());
+ deserializedEnhancedModeOptions.prompts = prompts;
+ } else {
+ reader.skipChildren();
+ }
+ }
+ return deserializedEnhancedModeOptions;
+ });
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/PhraseListOptions.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/PhraseListOptions.java
new file mode 100644
index 000000000000..e269745eaf87
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/PhraseListOptions.java
@@ -0,0 +1,123 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.speech.transcription.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Phrase list properties for transcription.
+ */
+@Fluent
+public final class PhraseListOptions implements JsonSerializable {
+
+ /*
+ * List of phrases for recognition.
+ */
+ @Generated
+ private List phrases;
+
+ /*
+ * Biasing weight for phrase list (1.0 to 20.0).
+ */
+ @Generated
+ private Double biasingWeight;
+
+ /**
+ * Creates an instance of PhraseListOptions class.
+ */
+ @Generated
+ public PhraseListOptions() {
+ }
+
+ /**
+ * Get the phrases property: List of phrases for recognition.
+ *
+ * @return the phrases value.
+ */
+ @Generated
+ public List getPhrases() {
+ return this.phrases;
+ }
+
+ /**
+ * Set the phrases property: List of phrases for recognition.
+ *
+ * @param phrases the phrases value to set.
+ * @return the PhraseListOptions object itself.
+ */
+ @Generated
+ public PhraseListOptions setPhrases(List phrases) {
+ this.phrases = phrases;
+ return this;
+ }
+
+ /**
+ * Get the biasingWeight property: Biasing weight for phrase list (1.0 to 20.0).
+ *
+ * @return the biasingWeight value.
+ */
+ @Generated
+ public Double getBiasingWeight() {
+ return this.biasingWeight;
+ }
+
+ /**
+ * Set the biasingWeight property: Biasing weight for phrase list (1.0 to 20.0).
+ *
+ * @param biasingWeight the biasingWeight value to set.
+ * @return the PhraseListOptions object itself.
+ */
+ @Generated
+ public PhraseListOptions setBiasingWeight(Double biasingWeight) {
+ this.biasingWeight = biasingWeight;
+ return this;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Generated
+ @Override
+ public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+ jsonWriter.writeStartObject();
+ jsonWriter.writeArrayField("phrases", this.phrases, (writer, element) -> writer.writeString(element));
+ jsonWriter.writeNumberField("biasingWeight", this.biasingWeight);
+ return jsonWriter.writeEndObject();
+ }
+
+ /**
+ * Reads an instance of PhraseListOptions from the JsonReader.
+ *
+ * @param jsonReader The JsonReader being read.
+ * @return An instance of PhraseListOptions if the JsonReader was pointing to an instance of it, or null if it was
+ * pointing to JSON null.
+ * @throws IOException If an error occurs while reading the PhraseListOptions.
+ */
+ @Generated
+ public static PhraseListOptions fromJson(JsonReader jsonReader) throws IOException {
+ return jsonReader.readObject(reader -> {
+ PhraseListOptions deserializedPhraseListOptions = new PhraseListOptions();
+ while (reader.nextToken() != JsonToken.END_OBJECT) {
+ String fieldName = reader.getFieldName();
+ reader.nextToken();
+ if ("phrases".equals(fieldName)) {
+ List phrases = reader.readArray(reader1 -> reader1.getString());
+ deserializedPhraseListOptions.phrases = phrases;
+ } else if ("biasingWeight".equals(fieldName)) {
+ deserializedPhraseListOptions.biasingWeight = reader.getNullable(JsonReader::getDouble);
+ } else {
+ reader.skipChildren();
+ }
+ }
+ return deserializedPhraseListOptions;
+ });
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/ProfanityFilterMode.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/ProfanityFilterMode.java
new file mode 100644
index 000000000000..8031bcadc6ae
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/ProfanityFilterMode.java
@@ -0,0 +1,69 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.speech.transcription.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import java.util.Collection;
+
+/**
+ * Mode of profanity filtering.
+ */
+public final class ProfanityFilterMode extends ExpandableStringEnum {
+
+ /**
+ * Disable profanity filtering.
+ */
+ @Generated
+ public static final ProfanityFilterMode NONE = fromString("None");
+
+ /**
+ * Remove profanity.
+ */
+ @Generated
+ public static final ProfanityFilterMode REMOVED = fromString("Removed");
+
+ /**
+ * Add "profanity" XML tags</Profanity>.
+ */
+ @Generated
+ public static final ProfanityFilterMode TAGS = fromString("Tags");
+
+ /**
+ * Mask the profanity with * except of the first letter, e.g., f***.
+ */
+ @Generated
+ public static final ProfanityFilterMode MASKED = fromString("Masked");
+
+ /**
+ * Creates a new instance of ProfanityFilterMode value.
+ *
+ * @deprecated Use the {@link #fromString(String)} factory method.
+ */
+ @Generated
+ @Deprecated
+ public ProfanityFilterMode() {
+ }
+
+ /**
+ * Creates or finds a ProfanityFilterMode from its string representation.
+ *
+ * @param name a name to look for.
+ * @return the corresponding ProfanityFilterMode.
+ */
+ @Generated
+ public static ProfanityFilterMode fromString(String name) {
+ return fromString(name, ProfanityFilterMode.class);
+ }
+
+ /**
+ * Gets known ProfanityFilterMode values.
+ *
+ * @return known ProfanityFilterMode values.
+ */
+ @Generated
+ public static Collection values() {
+ return values(ProfanityFilterMode.class);
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscribedPhrase.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscribedPhrase.java
new file mode 100644
index 000000000000..6406e29ed530
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscribedPhrase.java
@@ -0,0 +1,237 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.speech.transcription.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.time.Duration;
+import java.util.List;
+
+/**
+ * A transcribed phrase.
+ */
+@Immutable
+public final class TranscribedPhrase implements JsonSerializable {
+
+ /*
+ * The 0-based channel index. Only present if channel separation is enabled.
+ */
+ @Generated
+ private Integer channel;
+
+ /*
+ * A unique integer number that is assigned to each speaker detected in the audio without particular order. Only
+ * present if speaker diarization is enabled.
+ */
+ @Generated
+ private Integer speaker;
+
+ /*
+ * The start offset of the phrase in milliseconds.
+ */
+ @Generated
+ private final int offset;
+
+ /*
+ * The duration of the phrase in milliseconds.
+ */
+ @Generated
+ private final int duration;
+
+ /*
+ * The transcribed text of the phrase.
+ */
+ @Generated
+ private final String text;
+
+ /*
+ * The words that make up the phrase. Only present if word-level timestamps are enabled.
+ */
+ @Generated
+ private List words;
+
+ /*
+ * The locale of the phrase.
+ */
+ @Generated
+ private String locale;
+
+ /*
+ * The confidence value for the phrase.
+ */
+ @Generated
+ private final double confidence;
+
+ /**
+ * Creates an instance of TranscribedPhrase class.
+ *
+ * @param offset the offset value to set.
+ * @param duration the duration value to set.
+ * @param text the text value to set.
+ * @param confidence the confidence value to set.
+ */
+ @Generated
+ private TranscribedPhrase(int offset, int duration, String text, double confidence) {
+ this.offset = offset;
+ this.duration = duration;
+ this.text = text;
+ this.confidence = confidence;
+ }
+
+ /**
+ * Get the channel property: The 0-based channel index. Only present if channel separation is enabled.
+ *
+ * @return the channel value.
+ */
+ @Generated
+ public Integer getChannel() {
+ return this.channel;
+ }
+
+ /**
+ * Get the speaker property: A unique integer number that is assigned to each speaker detected in the audio without
+ * particular order. Only present if speaker diarization is enabled.
+ *
+ * @return the speaker value.
+ */
+ @Generated
+ public Integer getSpeaker() {
+ return this.speaker;
+ }
+
+ /**
+ * Get the offset property: The start offset of the phrase in milliseconds.
+ *
+ * @return the offset value.
+ */
+ @Generated
+ public int getOffset() {
+ return this.offset;
+ }
+
+ /**
+ * Get the duration property: The duration in milliseconds.
+ *
+ * @return the duration value as Duration.
+ */
+ @Generated
+ public Duration getDuration() {
+ return Duration.ofMillis(this.duration);
+ }
+
+ /**
+ * Get the text property: The transcribed text of the phrase.
+ *
+ * @return the text value.
+ */
+ @Generated
+ public String getText() {
+ return this.text;
+ }
+
+ /**
+ * Get the words property: The words that make up the phrase. Only present if word-level timestamps are enabled.
+ *
+ * @return the words value.
+ */
+ @Generated
+ public List getWords() {
+ return this.words;
+ }
+
+ /**
+ * Get the locale property: The locale of the phrase.
+ *
+ * @return the locale value.
+ */
+ @Generated
+ public String getLocale() {
+ return this.locale;
+ }
+
+ /**
+ * Get the confidence property: The confidence value for the phrase.
+ *
+ * @return the confidence value.
+ */
+ @Generated
+ public double getConfidence() {
+ return this.confidence;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Generated
+ @Override
+ public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+ jsonWriter.writeStartObject();
+ jsonWriter.writeIntField("offsetMilliseconds", this.offset);
+ jsonWriter.writeIntField("durationMilliseconds", this.duration);
+ jsonWriter.writeStringField("text", this.text);
+ jsonWriter.writeDoubleField("confidence", this.confidence);
+ jsonWriter.writeNumberField("channel", this.channel);
+ jsonWriter.writeNumberField("speaker", this.speaker);
+ jsonWriter.writeArrayField("words", this.words, (writer, element) -> writer.writeJson(element));
+ jsonWriter.writeStringField("locale", this.locale);
+ return jsonWriter.writeEndObject();
+ }
+
+ /**
+ * Reads an instance of TranscribedPhrase from the JsonReader.
+ *
+ * @param jsonReader The JsonReader being read.
+ * @return An instance of TranscribedPhrase if the JsonReader was pointing to an instance of it, or null if it was
+ * pointing to JSON null.
+ * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+ * @throws IOException If an error occurs while reading the TranscribedPhrase.
+ */
+ @Generated
+ public static TranscribedPhrase fromJson(JsonReader jsonReader) throws IOException {
+ return jsonReader.readObject(reader -> {
+ int offset = 0;
+ int duration = 0;
+ String text = null;
+ double confidence = 0.0;
+ Integer channel = null;
+ Integer speaker = null;
+ List words = null;
+ String locale = null;
+ while (reader.nextToken() != JsonToken.END_OBJECT) {
+ String fieldName = reader.getFieldName();
+ reader.nextToken();
+ if ("offsetMilliseconds".equals(fieldName)) {
+ offset = reader.getInt();
+ } else if ("durationMilliseconds".equals(fieldName)) {
+ duration = reader.getInt();
+ } else if ("text".equals(fieldName)) {
+ text = reader.getString();
+ } else if ("confidence".equals(fieldName)) {
+ confidence = reader.getDouble();
+ } else if ("channel".equals(fieldName)) {
+ channel = reader.getNullable(JsonReader::getInt);
+ } else if ("speaker".equals(fieldName)) {
+ speaker = reader.getNullable(JsonReader::getInt);
+ } else if ("words".equals(fieldName)) {
+ words = reader.readArray(reader1 -> TranscribedWord.fromJson(reader1));
+ } else if ("locale".equals(fieldName)) {
+ locale = reader.getString();
+ } else {
+ reader.skipChildren();
+ }
+ }
+ TranscribedPhrase deserializedTranscribedPhrase = new TranscribedPhrase(offset, duration, text, confidence);
+ deserializedTranscribedPhrase.channel = channel;
+ deserializedTranscribedPhrase.speaker = speaker;
+ deserializedTranscribedPhrase.words = words;
+ deserializedTranscribedPhrase.locale = locale;
+ return deserializedTranscribedPhrase;
+ });
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscribedWord.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscribedWord.java
new file mode 100644
index 000000000000..a0c046e15cbe
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscribedWord.java
@@ -0,0 +1,127 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.speech.transcription.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.time.Duration;
+
+/**
+ * Time-stamped word in the display form.
+ */
+@Immutable
+public final class TranscribedWord implements JsonSerializable {
+
+ /*
+ * The recognized word, including punctuation.
+ */
+ @Generated
+ private final String text;
+
+ /*
+ * The start offset of the word in milliseconds.
+ */
+ @Generated
+ private final int offset;
+
+ /*
+ * The duration of the word in milliseconds.
+ */
+ @Generated
+ private final int duration;
+
+ /**
+ * Creates an instance of TranscribedWord class.
+ *
+ * @param text the text value to set.
+ * @param offset the offset value to set.
+ * @param duration the duration value to set.
+ */
+ @Generated
+ private TranscribedWord(String text, int offset, int duration) {
+ this.text = text;
+ this.offset = offset;
+ this.duration = duration;
+ }
+
+ /**
+ * Get the text property: The recognized word, including punctuation.
+ *
+ * @return the text value.
+ */
+ @Generated
+ public String getText() {
+ return this.text;
+ }
+
+ /**
+ * Get the offset property: The start offset of the word in milliseconds.
+ *
+ * @return the offset value.
+ */
+ @Generated
+ public int getOffset() {
+ return this.offset;
+ }
+
+ /**
+ * Get the duration property: The duration in milliseconds.
+ *
+ * @return the duration value as Duration.
+ */
+ @Generated
+ public Duration getDuration() {
+ return Duration.ofMillis(this.duration);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Generated
+ @Override
+ public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+ jsonWriter.writeStartObject();
+ jsonWriter.writeStringField("text", this.text);
+ jsonWriter.writeIntField("offsetMilliseconds", this.offset);
+ jsonWriter.writeIntField("durationMilliseconds", this.duration);
+ return jsonWriter.writeEndObject();
+ }
+
+ /**
+ * Reads an instance of TranscribedWord from the JsonReader.
+ *
+ * @param jsonReader The JsonReader being read.
+ * @return An instance of TranscribedWord if the JsonReader was pointing to an instance of it, or null if it was
+ * pointing to JSON null.
+ * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+ * @throws IOException If an error occurs while reading the TranscribedWord.
+ */
+ @Generated
+ public static TranscribedWord fromJson(JsonReader jsonReader) throws IOException {
+ return jsonReader.readObject(reader -> {
+ String text = null;
+ int offset = 0;
+ int duration = 0;
+ while (reader.nextToken() != JsonToken.END_OBJECT) {
+ String fieldName = reader.getFieldName();
+ reader.nextToken();
+ if ("text".equals(fieldName)) {
+ text = reader.getString();
+ } else if ("offsetMilliseconds".equals(fieldName)) {
+ offset = reader.getInt();
+ } else if ("durationMilliseconds".equals(fieldName)) {
+ duration = reader.getInt();
+ } else {
+ reader.skipChildren();
+ }
+ }
+ return new TranscribedWord(text, offset, duration);
+ });
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionContent.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionContent.java
new file mode 100644
index 000000000000..5ba64e48e89e
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionContent.java
@@ -0,0 +1,73 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.speech.transcription.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+
+/**
+ * Request model for transcription operation.
+ */
+@Fluent
+public final class TranscriptionContent {
+
+ /*
+ * The content of the audio file to be transcribed. The audio file must be shorter than 2 hours in audio duration
+ * and smaller than 250 MB in size. Optional if audioUrl is provided in the definition.
+ */
+ @Generated
+ private AudioFileDetails audio;
+
+ /**
+ * Get the audio property: The content of the audio file to be transcribed. The audio file must be shorter than 2
+ * hours in audio duration and smaller than 250 MB in size. Optional if audioUrl is provided in the definition.
+ *
+ * @return the audio value.
+ */
+ @Generated
+ public AudioFileDetails getAudio() {
+ return this.audio;
+ }
+
+ /**
+ * Set the audio property: The content of the audio file to be transcribed. The audio file must be shorter than 2
+ * hours in audio duration and smaller than 250 MB in size. Optional if audioUrl is provided in the definition.
+ *
+ * @param audio the audio value to set.
+ * @return the TranscriptionContent object itself.
+ */
+ @Generated
+ public TranscriptionContent setAudio(AudioFileDetails audio) {
+ this.audio = audio;
+ return this;
+ }
+
+ /**
+ * Creates an instance of TranscriptionContent class.
+ *
+ * @param options the options value to set.
+ */
+ @Generated
+ public TranscriptionContent(TranscriptionOptions options) {
+ this.options = options;
+ }
+
+ /*
+ * Metadata for a transcription request. This field contains a JSON-serialized object of type
+ * `TranscriptionOptions`.
+ */
+ @Generated
+ private final TranscriptionOptions options;
+
+ /**
+ * Get the options property: Metadata for a transcription request. This field contains a JSON-serialized object of
+ * type `TranscriptionOptions`.
+ *
+ * @return the options value.
+ */
+ @Generated
+ public TranscriptionOptions getOptions() {
+ return this.options;
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionDiarizationOptions.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionDiarizationOptions.java
new file mode 100644
index 000000000000..831c546c31ed
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionDiarizationOptions.java
@@ -0,0 +1,115 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.speech.transcription.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * The Speaker Diarization settings. Diarization settings must be specified to enable speaker diarization.
+ */
+@Fluent
+public final class TranscriptionDiarizationOptions implements JsonSerializable {
+
+ /*
+ * Enable speaker diarization. This is automatically set to true when maxSpeakers is specified.
+ */
+ @Generated
+ private Boolean enabled;
+
+ /*
+ * Gets or sets a hint for the maximum number of speakers for diarization. Must be greater than 1 and less than 36.
+ */
+ @Generated
+ private Integer maxSpeakers;
+
+ /**
+ * Creates an instance of TranscriptionDiarizationOptions class.
+ */
+ @Generated
+ public TranscriptionDiarizationOptions() {
+ }
+
+ /**
+ * Get the enabled property: Enable speaker diarization. This is automatically set to true when maxSpeakers is
+ * specified.
+ *
+ * @return the enabled value.
+ */
+ @Generated
+ public Boolean isEnabled() {
+ return this.enabled;
+ }
+
+ /**
+ * Get the maxSpeakers property: Gets or sets a hint for the maximum number of speakers for diarization. Must be
+ * greater than 1 and less than 36.
+ *
+ * @return the maxSpeakers value.
+ */
+ @Generated
+ public Integer getMaxSpeakers() {
+ return this.maxSpeakers;
+ }
+
+ /**
+ * Set the maxSpeakers property: Gets or sets a hint for the maximum number of speakers for diarization. Must be
+ * greater than 1 and less than 36.
+ *
+ * @param maxSpeakers the maxSpeakers value to set.
+ * @return the TranscriptionDiarizationOptions object itself.
+ */
+ @Generated
+ public TranscriptionDiarizationOptions setMaxSpeakers(Integer maxSpeakers) {
+ this.maxSpeakers = maxSpeakers;
+ return this;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Generated
+ @Override
+ public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+ jsonWriter.writeStartObject();
+ if (this.maxSpeakers != null) {
+ jsonWriter.writeBooleanField("enabled", true);
+ jsonWriter.writeNumberField("maxSpeakers", this.maxSpeakers);
+ }
+ return jsonWriter.writeEndObject();
+ }
+
+ /**
+ * Reads an instance of TranscriptionDiarizationOptions from the JsonReader.
+ *
+ * @param jsonReader The JsonReader being read.
+ * @return An instance of TranscriptionDiarizationOptions if the JsonReader was pointing to an instance of it, or
+ * null if it was pointing to JSON null.
+ * @throws IOException If an error occurs while reading the TranscriptionDiarizationOptions.
+ */
+ @Generated
+ public static TranscriptionDiarizationOptions fromJson(JsonReader jsonReader) throws IOException {
+ return jsonReader.readObject(reader -> {
+ TranscriptionDiarizationOptions deserializedTranscriptionDiarizationOptions
+ = new TranscriptionDiarizationOptions();
+ while (reader.nextToken() != JsonToken.END_OBJECT) {
+ String fieldName = reader.getFieldName();
+ reader.nextToken();
+ if ("enabled".equals(fieldName)) {
+ deserializedTranscriptionDiarizationOptions.enabled = reader.getNullable(JsonReader::getBoolean);
+ } else if ("maxSpeakers".equals(fieldName)) {
+ deserializedTranscriptionDiarizationOptions.maxSpeakers = reader.getNullable(JsonReader::getInt);
+ } else {
+ reader.skipChildren();
+ }
+ }
+ return deserializedTranscriptionDiarizationOptions;
+ });
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionOptions.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionOptions.java
new file mode 100644
index 000000000000..2b298d84d59a
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionOptions.java
@@ -0,0 +1,350 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.speech.transcription.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Metadata for a transcription request.
+ */
+@Fluent
+public final class TranscriptionOptions implements JsonSerializable {
+
+ /*
+ * The URL of the audio to be transcribed. The audio must be shorter than 2 hours in audio duration and smaller than
+ * 250 MB in size. If both Audio and AudioUrl are provided, Audio is used.
+ */
+ @Generated
+ private String audioUrl;
+
+ /*
+ * A list of possible locales for the transcription. If not specified, the locale of the speech in the audio is
+ * detected automatically from all supported locales.
+ */
+ @Generated
+ private List locales;
+
+ /*
+ * Maps some or all candidate locales to a model URI to be used for transcription. If no mapping is given, the
+ * default model for the locale is used.
+ */
+ @Generated
+ private Map localeModelMapping;
+
+ /*
+ * Mode of profanity filtering.
+ */
+ @Generated
+ private ProfanityFilterMode profanityFilterMode;
+
+ /*
+ * Mode of diarization.
+ */
+ @Generated
+ private TranscriptionDiarizationOptions diarizationOptions;
+
+ /*
+ * The 0-based indices of the channels to be transcribed separately. If not specified, multiple channels are merged
+ * and transcribed jointly. Only up to two channels are supported.
+ */
+ @Generated
+ private List activeChannels;
+
+ /*
+ * Enhanced mode properties.
+ */
+ @Generated
+ private EnhancedModeOptions enhancedModeOptions;
+
+ /*
+ * Phrase list properties.
+ */
+ @Generated
+ private PhraseListOptions phraseListOptions;
+
+ /**
+ * Get the audioUrl property: The URL of the audio to be transcribed. The audio must be shorter than 2 hours in
+ * audio duration and smaller than 250 MB in size. If both Audio and AudioUrl are provided, Audio is used.
+ *
+ * @return the audioUrl value.
+ */
+ @Generated
+ public String getAudioUrl() {
+ return this.audioUrl;
+ }
+
+ /**
+ * Set the audioUrl property: The URL of the audio to be transcribed. The audio must be shorter than 2 hours in
+ * audio duration and smaller than 250 MB in size. If both Audio and AudioUrl are provided, Audio is used.
+ *
+ * @param audioUrl the audioUrl value to set.
+ * @return the TranscriptionOptions object itself.
+ */
+ @Generated
+ private TranscriptionOptions setAudioUrl(String audioUrl) {
+ this.audioUrl = audioUrl;
+ return this;
+ }
+
+ /**
+ * Get the locales property: A list of possible locales for the transcription. If not specified, the locale of the
+ * speech in the audio is detected automatically from all supported locales.
+ *
+ * @return the locales value.
+ */
+ @Generated
+ public List getLocales() {
+ return this.locales;
+ }
+
+ /**
+ * Set the locales property: A list of possible locales for the transcription. If not specified, the locale of the
+ * speech in the audio is detected automatically from all supported locales.
+ *
+ * @param locales the locales value to set.
+ * @return the TranscriptionOptions object itself.
+ */
+ @Generated
+ public TranscriptionOptions setLocales(List locales) {
+ this.locales = locales;
+ return this;
+ }
+
+ /**
+ * Get the localeModelMapping property: Maps some or all candidate locales to a model URI to be used for
+ * transcription. If no mapping is given, the default model for the locale is used.
+ *
+ * @return the localeModelMapping value.
+ */
+ @Generated
+ public Map getLocaleModelMapping() {
+ return this.localeModelMapping;
+ }
+
+ /**
+ * Set the localeModelMapping property: Maps some or all candidate locales to a model URI to be used for
+ * transcription. If no mapping is given, the default model for the locale is used.
+ *
+ * @param localeModelMapping the localeModelMapping value to set.
+ * @return the TranscriptionOptions object itself.
+ */
+ @Generated
+ public TranscriptionOptions setLocaleModelMapping(Map localeModelMapping) {
+ this.localeModelMapping = localeModelMapping;
+ return this;
+ }
+
+ /**
+ * Get the profanityFilterMode property: Mode of profanity filtering.
+ *
+ * @return the profanityFilterMode value.
+ */
+ @Generated
+ public ProfanityFilterMode getProfanityFilterMode() {
+ return this.profanityFilterMode;
+ }
+
+ /**
+ * Set the profanityFilterMode property: Mode of profanity filtering.
+ *
+ * @param profanityFilterMode the profanityFilterMode value to set.
+ * @return the TranscriptionOptions object itself.
+ */
+ @Generated
+ public TranscriptionOptions setProfanityFilterMode(ProfanityFilterMode profanityFilterMode) {
+ this.profanityFilterMode = profanityFilterMode;
+ return this;
+ }
+
+ /**
+ * Get the diarizationOptions property: Mode of diarization.
+ *
+ * @return the diarizationOptions value.
+ */
+ @Generated
+ public TranscriptionDiarizationOptions getDiarizationOptions() {
+ return this.diarizationOptions;
+ }
+
+ /**
+ * Set the diarizationOptions property: Mode of diarization.
+ *
+ * @param diarizationOptions the diarizationOptions value to set.
+ * @return the TranscriptionOptions object itself.
+ */
+ @Generated
+ public TranscriptionOptions setDiarizationOptions(TranscriptionDiarizationOptions diarizationOptions) {
+ this.diarizationOptions = diarizationOptions;
+ return this;
+ }
+
+ /**
+ * Get the activeChannels property: The 0-based indices of the channels to be transcribed separately. If not
+ * specified, multiple channels are merged and transcribed jointly. Only up to two channels are supported.
+ *
+ * @return the activeChannels value.
+ */
+ @Generated
+ public List getActiveChannels() {
+ return this.activeChannels;
+ }
+
+ /**
+ * Set the activeChannels property: The 0-based indices of the channels to be transcribed separately. If not
+ * specified, multiple channels are merged and transcribed jointly. Only up to two channels are supported.
+ *
+ * @param activeChannels the activeChannels value to set.
+ * @return the TranscriptionOptions object itself.
+ */
+ @Generated
+ public TranscriptionOptions setActiveChannels(List activeChannels) {
+ this.activeChannels = activeChannels;
+ return this;
+ }
+
+ /**
+ * Get the enhancedModeOptions property: Enhanced mode properties.
+ *
+ * @return the enhancedModeOptions value.
+ */
+ @Generated
+ public EnhancedModeOptions getEnhancedModeOptions() {
+ return this.enhancedModeOptions;
+ }
+
+ /**
+ * Set the enhancedModeOptions property: Enhanced mode properties.
+ *
+ * @param enhancedModeOptions the enhancedModeOptions value to set.
+ * @return the TranscriptionOptions object itself.
+ */
+ @Generated
+ public TranscriptionOptions setEnhancedModeOptions(EnhancedModeOptions enhancedModeOptions) {
+ this.enhancedModeOptions = enhancedModeOptions;
+ return this;
+ }
+
+ /**
+ * Get the phraseListOptions property: Phrase list properties.
+ *
+ * @return the phraseListOptions value.
+ */
+ @Generated
+ public PhraseListOptions getPhraseListOptions() {
+ return this.phraseListOptions;
+ }
+
+ /**
+ * Set the phraseListOptions property: Phrase list properties.
+ *
+ * @param phraseListOptions the phraseListOptions value to set.
+ * @return the TranscriptionOptions object itself.
+ */
+ @Generated
+ public TranscriptionOptions setPhraseListOptions(PhraseListOptions phraseListOptions) {
+ this.phraseListOptions = phraseListOptions;
+ return this;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Generated
+ @Override
+ public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+ jsonWriter.writeStartObject();
+ jsonWriter.writeStringField("audioUrl", this.audioUrl);
+ jsonWriter.writeArrayField("locales", this.locales, (writer, element) -> writer.writeString(element));
+ jsonWriter.writeMapField("models", this.localeModelMapping, (writer, element) -> writer.writeString(element));
+ jsonWriter.writeStringField("profanityFilterMode",
+ this.profanityFilterMode == null ? null : this.profanityFilterMode.toString());
+ jsonWriter.writeJsonField("diarization", this.diarizationOptions);
+ jsonWriter.writeArrayField("channels", this.activeChannels, (writer, element) -> writer.writeInt(element));
+ jsonWriter.writeJsonField("enhancedMode", this.enhancedModeOptions);
+ jsonWriter.writeJsonField("phraseList", this.phraseListOptions);
+ return jsonWriter.writeEndObject();
+ }
+
+ /**
+ * Reads an instance of TranscriptionOptions from the JsonReader.
+ *
+ * @param jsonReader The JsonReader being read.
+ * @return An instance of TranscriptionOptions if the JsonReader was pointing to an instance of it, or null if it
+ * was pointing to JSON null.
+ * @throws IOException If an error occurs while reading the TranscriptionOptions.
+ */
+ @Generated
+ public static TranscriptionOptions fromJson(JsonReader jsonReader) throws IOException {
+ return jsonReader.readObject(reader -> {
+ TranscriptionOptions deserializedTranscriptionOptions = new TranscriptionOptions((String) null);
+ while (reader.nextToken() != JsonToken.END_OBJECT) {
+ String fieldName = reader.getFieldName();
+ reader.nextToken();
+ if ("audioUrl".equals(fieldName)) {
+ deserializedTranscriptionOptions.audioUrl = reader.getString();
+ } else if ("locales".equals(fieldName)) {
+ List locales = reader.readArray(reader1 -> reader1.getString());
+ deserializedTranscriptionOptions.locales = locales;
+ } else if ("localeModelMapping".equals(fieldName)) {
+ Map localeModelMapping = reader.readMap(reader1 -> reader1.getString());
+ deserializedTranscriptionOptions.localeModelMapping = localeModelMapping;
+ } else if ("profanityFilterMode".equals(fieldName)) {
+ deserializedTranscriptionOptions.profanityFilterMode
+ = ProfanityFilterMode.fromString(reader.getString());
+ } else if ("diarization".equals(fieldName)) {
+ deserializedTranscriptionOptions.diarizationOptions
+ = TranscriptionDiarizationOptions.fromJson(reader);
+ } else if ("channels".equals(fieldName)) {
+ List activeChannels = reader.readArray(reader1 -> reader1.getInt());
+ deserializedTranscriptionOptions.activeChannels = activeChannels;
+ } else if ("enhancedMode".equals(fieldName)) {
+ deserializedTranscriptionOptions.enhancedModeOptions = EnhancedModeOptions.fromJson(reader);
+ } else if ("phraseList".equals(fieldName)) {
+ deserializedTranscriptionOptions.phraseListOptions = PhraseListOptions.fromJson(reader);
+ } else {
+ reader.skipChildren();
+ }
+ }
+ return deserializedTranscriptionOptions;
+ });
+ }
+
+ private final AudioFileDetails audioFileDetails;
+
+ /**
+ * Creates an instance of TranscriptionOptions class with audio URL.
+ *
+ * @param audioUrl the URL of the audio to be transcribed
+ */
+ public TranscriptionOptions(String audioUrl) {
+ this.audioUrl = audioUrl;
+ this.audioFileDetails = null;
+ }
+
+ /**
+ * Creates an instance of TranscriptionOptions class with audio file details.
+ *
+ * @param fileDetails the audio file details
+ */
+ public TranscriptionOptions(AudioFileDetails fileDetails) {
+ this.audioFileDetails = fileDetails;
+ }
+
+ /**
+ * Get the audioFileDetails property: The audio file details for transcription.
+ *
+ * @return the audioFileDetails value.
+ */
+ public AudioFileDetails getFileDetails() {
+ return this.audioFileDetails;
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionResult.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionResult.java
new file mode 100644
index 000000000000..12d5a00d20a1
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/TranscriptionResult.java
@@ -0,0 +1,130 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.speech.transcription.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.time.Duration;
+import java.util.List;
+
+/**
+ * The result of the transcribe operation.
+ */
+@Immutable
+public final class TranscriptionResult implements JsonSerializable {
+
+ /*
+ * The duration of the audio in milliseconds.
+ */
+ @Generated
+ private final int duration;
+
+ /*
+ * The full transcript for each channel.
+ */
+ @Generated
+ private final List combinedPhrases;
+
+ /*
+ * The transcription results segmented into phrases.
+ */
+ @Generated
+ private final List phrases;
+
+ /**
+ * Creates an instance of TranscriptionResult class.
+ *
+ * @param duration the duration value to set.
+ * @param combinedPhrases the combinedPhrases value to set.
+ * @param phrases the phrases value to set.
+ */
+ @Generated
+ private TranscriptionResult(int duration, List combinedPhrases,
+ List phrases) {
+ this.duration = duration;
+ this.combinedPhrases = combinedPhrases;
+ this.phrases = phrases;
+ }
+
+ /**
+ * Get the duration property: The duration in milliseconds.
+ *
+ * @return the duration value as Duration.
+ */
+ @Generated
+ public Duration getDuration() {
+ return Duration.ofMillis(this.duration);
+ }
+
+ /**
+ * Get the combinedPhrases property: The full transcript for each channel.
+ *
+ * @return the combinedPhrases value.
+ */
+ @Generated
+ public List getCombinedPhrases() {
+ return this.combinedPhrases;
+ }
+
+ /**
+ * Get the phrases property: The transcription results segmented into phrases.
+ *
+ * @return the phrases value.
+ */
+ @Generated
+ public List getPhrases() {
+ return this.phrases;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Generated
+ @Override
+ public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+ jsonWriter.writeStartObject();
+ jsonWriter.writeIntField("durationMilliseconds", this.duration);
+ jsonWriter.writeArrayField("combinedPhrases", this.combinedPhrases,
+ (writer, element) -> writer.writeJson(element));
+ jsonWriter.writeArrayField("phrases", this.phrases, (writer, element) -> writer.writeJson(element));
+ return jsonWriter.writeEndObject();
+ }
+
+ /**
+ * Reads an instance of TranscriptionResult from the JsonReader.
+ *
+ * @param jsonReader The JsonReader being read.
+ * @return An instance of TranscriptionResult if the JsonReader was pointing to an instance of it, or null if it was
+ * pointing to JSON null.
+ * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+ * @throws IOException If an error occurs while reading the TranscriptionResult.
+ */
+ @Generated
+ public static TranscriptionResult fromJson(JsonReader jsonReader) throws IOException {
+ return jsonReader.readObject(reader -> {
+ int duration = 0;
+ List combinedPhrases = null;
+ List phrases = null;
+ while (reader.nextToken() != JsonToken.END_OBJECT) {
+ String fieldName = reader.getFieldName();
+ reader.nextToken();
+ if ("durationMilliseconds".equals(fieldName)) {
+ duration = reader.getInt();
+ } else if ("combinedPhrases".equals(fieldName)) {
+ combinedPhrases = reader.readArray(reader1 -> ChannelCombinedPhrases.fromJson(reader1));
+ } else if ("phrases".equals(fieldName)) {
+ phrases = reader.readArray(reader1 -> TranscribedPhrase.fromJson(reader1));
+ } else {
+ reader.skipChildren();
+ }
+ }
+ return new TranscriptionResult(duration, combinedPhrases, phrases);
+ });
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/package-info.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/package-info.java
new file mode 100644
index 000000000000..f1ff4db6a641
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/models/package-info.java
@@ -0,0 +1,9 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+/**
+ *
+ * Package containing the data models for Transcription.
+ *
+ */
+package com.azure.ai.speech.transcription.models;
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/package-info.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/package-info.java
new file mode 100644
index 000000000000..8e50f3224f2d
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/com/azure/ai/speech/transcription/package-info.java
@@ -0,0 +1,9 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+/**
+ *
+ * Package containing the classes for Transcription.
+ *
+ */
+package com.azure.ai.speech.transcription;
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/java/module-info.java b/sdk/transcription/azure-ai-speech-transcription/src/main/java/module-info.java
new file mode 100644
index 000000000000..434a8672d9f3
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/java/module-info.java
@@ -0,0 +1,12 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+module com.azure.ai.speech.transcription {
+ requires transitive com.azure.core;
+
+ exports com.azure.ai.speech.transcription;
+ exports com.azure.ai.speech.transcription.models;
+
+ opens com.azure.ai.speech.transcription.models to com.azure.core;
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/resources/META-INF/azure-ai-speech-transcription_apiview_properties.json b/sdk/transcription/azure-ai-speech-transcription/src/main/resources/META-INF/azure-ai-speech-transcription_apiview_properties.json
new file mode 100644
index 000000000000..e70a697123f8
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/resources/META-INF/azure-ai-speech-transcription_apiview_properties.json
@@ -0,0 +1,23 @@
+{
+ "flavor": "azure",
+ "CrossLanguageDefinitionId": {
+ "com.azure.ai.speech.transcription.TranscriptionAsyncClient": "Azure.Ai.Speech.Transcription",
+ "com.azure.ai.speech.transcription.TranscriptionAsyncClient.transcribe": "Azure.Ai.Speech.Transcription.transcribe",
+ "com.azure.ai.speech.transcription.TranscriptionAsyncClient.transcribeWithResponse": "Azure.Ai.Speech.Transcription.transcribe",
+ "com.azure.ai.speech.transcription.TranscriptionClient": "Azure.Ai.Speech.Transcription",
+ "com.azure.ai.speech.transcription.TranscriptionClient.transcribe": "Azure.Ai.Speech.Transcription.transcribe",
+ "com.azure.ai.speech.transcription.TranscriptionClient.transcribeWithResponse": "Azure.Ai.Speech.Transcription.transcribe",
+ "com.azure.ai.speech.transcription.TranscriptionClientBuilder": "Azure.Ai.Speech.Transcription",
+ "com.azure.ai.speech.transcription.models.AudioFileDetails": null,
+ "com.azure.ai.speech.transcription.models.ChannelCombinedPhrases": "Azure.Ai.Speech.Transcription.ChannelCombinedPhrases",
+ "com.azure.ai.speech.transcription.models.EnhancedModeOptions": "Azure.Ai.Speech.Transcription.EnhancedModeProperties",
+ "com.azure.ai.speech.transcription.models.PhraseListOptions": "Azure.Ai.Speech.Transcription.PhraseListProperties",
+ "com.azure.ai.speech.transcription.models.ProfanityFilterMode": "Azure.Ai.Speech.Transcription.ProfanityFilterMode",
+ "com.azure.ai.speech.transcription.models.TranscribedPhrase": "Azure.Ai.Speech.Transcription.TranscribedPhrase",
+ "com.azure.ai.speech.transcription.models.TranscribedWord": "Azure.Ai.Speech.Transcription.TranscribedWord",
+ "com.azure.ai.speech.transcription.models.TranscriptionContent": "Azure.Ai.Speech.Transcription.TranscriptionContent",
+ "com.azure.ai.speech.transcription.models.TranscriptionDiarizationOptions": "Azure.Ai.Speech.Transcription.TranscriptionDiarizationOptions",
+ "com.azure.ai.speech.transcription.models.TranscriptionOptions": "Azure.Ai.Speech.Transcription.TranscriptionOptions",
+ "com.azure.ai.speech.transcription.models.TranscriptionResult": "Azure.Ai.Speech.Transcription.TranscriptionResult"
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/resources/META-INF/azure-ai-speech-transcription_metadata.json b/sdk/transcription/azure-ai-speech-transcription/src/main/resources/META-INF/azure-ai-speech-transcription_metadata.json
new file mode 100644
index 000000000000..7ff97cdfa258
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/resources/META-INF/azure-ai-speech-transcription_metadata.json
@@ -0,0 +1 @@
+{"flavor":"azure","apiVersion":"2025-10-15","crossLanguageDefinitions":{"com.azure.ai.speech.transcription.TranscriptionAsyncClient":"Azure.Ai.Speech.Transcription","com.azure.ai.speech.transcription.TranscriptionAsyncClient.transcribe":"Azure.Ai.Speech.Transcription.transcribe","com.azure.ai.speech.transcription.TranscriptionAsyncClient.transcribeWithResponse":"Azure.Ai.Speech.Transcription.transcribe","com.azure.ai.speech.transcription.TranscriptionClient":"Azure.Ai.Speech.Transcription","com.azure.ai.speech.transcription.TranscriptionClient.transcribe":"Azure.Ai.Speech.Transcription.transcribe","com.azure.ai.speech.transcription.TranscriptionClient.transcribeWithResponse":"Azure.Ai.Speech.Transcription.transcribe","com.azure.ai.speech.transcription.TranscriptionClientBuilder":"Azure.Ai.Speech.Transcription","com.azure.ai.speech.transcription.models.AudioFileDetails":null,"com.azure.ai.speech.transcription.models.ChannelCombinedPhrases":"Azure.Ai.Speech.Transcription.ChannelCombinedPhrases","com.azure.ai.speech.transcription.models.EnhancedModeOptions":"Azure.Ai.Speech.Transcription.EnhancedModeProperties","com.azure.ai.speech.transcription.models.PhraseListOptions":"Azure.Ai.Speech.Transcription.PhraseListProperties","com.azure.ai.speech.transcription.models.ProfanityFilterMode":"Azure.Ai.Speech.Transcription.ProfanityFilterMode","com.azure.ai.speech.transcription.models.TranscribedPhrase":"Azure.Ai.Speech.Transcription.TranscribedPhrase","com.azure.ai.speech.transcription.models.TranscribedWord":"Azure.Ai.Speech.Transcription.TranscribedWord","com.azure.ai.speech.transcription.models.TranscriptionContent":"Azure.Ai.Speech.Transcription.TranscriptionContent","com.azure.ai.speech.transcription.models.TranscriptionDiarizationOptions":"Azure.Ai.Speech.Transcription.TranscriptionDiarizationOptions","com.azure.ai.speech.transcription.models.TranscriptionOptions":"Azure.Ai.Speech.Transcription.TranscriptionOptions","com.azure.ai.speech.transcription.models.TranscriptionResult":"Azure.Ai.Speech.Transcription.TranscriptionResult"},"generatedFiles":["src/main/java/com/azure/ai/speech/transcription/TranscriptionAsyncClient.java","src/main/java/com/azure/ai/speech/transcription/TranscriptionClient.java","src/main/java/com/azure/ai/speech/transcription/TranscriptionClientBuilder.java","src/main/java/com/azure/ai/speech/transcription/TranscriptionServiceVersion.java","src/main/java/com/azure/ai/speech/transcription/implementation/MultipartFormDataHelper.java","src/main/java/com/azure/ai/speech/transcription/implementation/TranscriptionClientImpl.java","src/main/java/com/azure/ai/speech/transcription/implementation/package-info.java","src/main/java/com/azure/ai/speech/transcription/models/AudioFileDetails.java","src/main/java/com/azure/ai/speech/transcription/models/ChannelCombinedPhrases.java","src/main/java/com/azure/ai/speech/transcription/models/EnhancedModeOptions.java","src/main/java/com/azure/ai/speech/transcription/models/PhraseListOptions.java","src/main/java/com/azure/ai/speech/transcription/models/ProfanityFilterMode.java","src/main/java/com/azure/ai/speech/transcription/models/TranscribedPhrase.java","src/main/java/com/azure/ai/speech/transcription/models/TranscribedWord.java","src/main/java/com/azure/ai/speech/transcription/models/TranscriptionContent.java","src/main/java/com/azure/ai/speech/transcription/models/TranscriptionDiarizationOptions.java","src/main/java/com/azure/ai/speech/transcription/models/TranscriptionOptions.java","src/main/java/com/azure/ai/speech/transcription/models/TranscriptionResult.java","src/main/java/com/azure/ai/speech/transcription/models/package-info.java","src/main/java/com/azure/ai/speech/transcription/package-info.java","src/main/java/module-info.java"]}
\ No newline at end of file
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/main/resources/azure-ai-speech-transcription.properties b/sdk/transcription/azure-ai-speech-transcription/src/main/resources/azure-ai-speech-transcription.properties
new file mode 100644
index 000000000000..ca812989b4f2
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/main/resources/azure-ai-speech-transcription.properties
@@ -0,0 +1,2 @@
+name=${project.artifactId}
+version=${project.version}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/assets/sample-audio.wav b/sdk/transcription/azure-ai-speech-transcription/src/samples/assets/sample-audio.wav
new file mode 100644
index 000000000000..bf23d54b0c00
Binary files /dev/null and b/sdk/transcription/azure-ai-speech-transcription/src/samples/assets/sample-audio.wav differ
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/assets/sample-profanity.wav b/sdk/transcription/azure-ai-speech-transcription/src/samples/assets/sample-profanity.wav
new file mode 100644
index 000000000000..e1926b3f5dcf
Binary files /dev/null and b/sdk/transcription/azure-ai-speech-transcription/src/samples/assets/sample-profanity.wav differ
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/EnhancedModeSample.java b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/EnhancedModeSample.java
new file mode 100644
index 000000000000..7abd8b0fc543
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/EnhancedModeSample.java
@@ -0,0 +1,146 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.speech.transcription;
+
+// BEGIN: com.azure.ai.speech.transcription.enhancedmode.imports
+import com.azure.ai.speech.transcription.models.AudioFileDetails;
+import com.azure.ai.speech.transcription.models.EnhancedModeOptions;
+import com.azure.ai.speech.transcription.models.ProfanityFilterMode;
+import com.azure.ai.speech.transcription.models.TranscriptionDiarizationOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionResult;
+import com.azure.core.credential.KeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Arrays;
+// END: com.azure.ai.speech.transcription.enhancedmode.imports
+
+/**
+ * Sample demonstrates how to use EnhancedModeOptions with LLM-enhanced speech transcription
+ * combining multiple features for optimal transcription quality.
+ *
+ * This sample shows:
+ * - Using lexical format prompts to guide LLM output
+ * - Providing domain-specific context for technical terminology
+ * - Enabling diarization (speaker identification) with enhanced mode
+ * - Configuring profanity filtering
+ *
+ * Enhanced mode leverages large language models to improve transcription quality
+ * by understanding context and domain-specific terminology.
+ */
+public class EnhancedModeSample {
+ /**
+ * Main method to run the enhanced mode sample.
+ *
+ * @param args command line arguments (not used)
+ */
+ public static void main(String[] args) {
+ String endpoint = System.getenv("SPEECH_ENDPOINT");
+ String apiKey = System.getenv("SPEECH_API_KEY");
+
+ if (endpoint == null || apiKey == null) {
+ System.err.println("Please set SPEECH_ENDPOINT and SPEECH_API_KEY environment variables");
+ System.err.println("Example:");
+ System.err.println(" set SPEECH_ENDPOINT=https://your-resource-name.cognitiveservices.azure.com/");
+ System.err.println(" set SPEECH_API_KEY=your-api-key");
+ return;
+ }
+
+ System.out.println("Azure AI Speech Transcription - Enhanced Mode Sample");
+ System.out.println("=====================================================\n");
+
+ // Demonstrate full enhanced mode with all features combined
+ demonstrateFullEnhancedMode(endpoint, apiKey);
+ }
+
+ /**
+ * Demonstrates using full enhanced mode with multiple features combined.
+ * This shows how to use lexical format prompts, domain context, diarization,
+ * and profanity filtering together for optimal transcription quality.
+ */
+ private static void demonstrateFullEnhancedMode(String endpoint, String apiKey) {
+ System.out.println("Enhanced Mode with Multiple Features Combined");
+ System.out.println("----------------------------------------------");
+
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(apiKey))
+ .buildClient();
+
+ try {
+ String audioFilePath = "src/samples/assets/sample-audio.wav";
+ if (!Files.exists(Paths.get(audioFilePath))) {
+ System.out.println("Audio file not found: " + audioFilePath);
+ System.out.println(" Skipping this example.\n");
+ return;
+ }
+
+ byte[] audioData = Files.readAllBytes(Paths.get(audioFilePath));
+
+ // Use the helper method to demonstrate full configuration
+ TranscriptionResult result = transcribeWithFullEnhancedMode(client, audioData, audioFilePath);
+
+ System.out.println(" Full enhanced mode configuration applied");
+ System.out.println("Features: LLM prompts, diarization, profanity filtering");
+ System.out.println("Duration: " + result.getDuration() + " ms");
+ if (result.getCombinedPhrases() != null && !result.getCombinedPhrases().isEmpty()) {
+ System.out.println("\nTranscription: " + result.getCombinedPhrases().get(0).getText());
+ }
+ if (result.getPhrases() != null && !result.getPhrases().isEmpty()) {
+ System.out.println("\nPhrases with speakers:");
+ result.getPhrases().forEach(phrase ->
+ System.out.println(" [Speaker " + phrase.getSpeaker() + ", "
+ + phrase.getOffset() + " ms] " + phrase.getText())
+ );
+ }
+ System.out.println();
+
+ } catch (IOException e) {
+ System.err.println("Error reading audio file: " + e.getMessage() + "\n");
+ } catch (Exception e) {
+ System.err.println("Error during transcription: " + e.getMessage() + "\n");
+ }
+ }
+
+ /**
+ * Helper method demonstrating how to combine all enhanced mode features.
+ * This is a reusable pattern for high-quality LLM-enhanced transcription.
+ */
+ // BEGIN: com.azure.ai.speech.transcription.enhancedmode.complete
+ private static TranscriptionResult transcribeWithFullEnhancedMode(
+ TranscriptionClient client,
+ byte[] audioData,
+ String filename
+ ) throws Exception {
+ // Create audio file details
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Configure comprehensive LLM-enhanced mode settings
+ // Enhanced mode is automatically enabled when you create EnhancedModeOptions
+ // Always include lexical format prompt for best results
+ EnhancedModeOptions enhancedMode = new EnhancedModeOptions()
+ .setTask("transcribe")
+ .setPrompts(Arrays.asList(
+ "Output must be in lexical format."
+ ));
+
+ // Enable diarization for speaker identification
+ TranscriptionDiarizationOptions diarizationOptions = new TranscriptionDiarizationOptions()
+ .setMaxSpeakers(5);
+
+ // Create transcription options with all features enabled
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
+ .setLocales(Arrays.asList())
+ .setEnhancedModeOptions(enhancedMode)
+ .setDiarizationOptions(diarizationOptions)
+ .setProfanityFilterMode(ProfanityFilterMode.MASKED);
+
+ // Transcribe with full LLM-enhanced mode and diarization
+ return client.transcribe(options);
+ }
+ // END: com.azure.ai.speech.transcription.enhancedmode.complete
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/README.md b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/README.md
new file mode 100644
index 000000000000..65eb1d9979d5
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/README.md
@@ -0,0 +1,260 @@
+# Azure AI Speech Transcription Samples
+
+This directory contains runnable code samples that demonstrate how to use the Azure AI Speech Transcription client library for Java.
+
+## Prerequisites
+
+To run these samples, you need:
+
+1. **Azure Subscription**: An active Azure subscription
+2. **Azure AI Speech Service Resource**: Create one in the [Azure Portal](https://portal.azure.com)
+3. **Authentication**: Choose one of the following authentication methods:
+
+### Option 1: Entra ID Authentication (Recommended for Production)
+
+ Set the endpoint and configure Entra ID credentials:
+
+ ```bash
+ set SPEECH_ENDPOINT=https://your-resource-name.cognitiveservices.azure.com/
+ ```
+
+ **And** configure one of the following credential sources:
+ - **Managed Identity**: For apps running in Azure (App Service, Azure Functions, VMs, etc.)
+ - **Azure CLI**: Run `az login` on your development machine
+ - **Environment Variables**: Set `AZURE_TENANT_ID`, `AZURE_CLIENT_ID`, and `AZURE_CLIENT_SECRET`
+ - **Visual Studio Code or IntelliJ**: Sign in through your IDE
+
+ **Note**: You'll also need to assign the "Cognitive Services User" role to your identity:
+
+ ```bash
+ az role assignment create --assignee \
+ --role "Cognitive Services User" \
+ --scope /subscriptions//resourceGroups//providers/Microsoft.CognitiveServices/accounts/
+ ```
+
+ **Required dependency** for Entra ID authentication:
+
+ ```xml
+
+ com.azure
+ azure-identity
+ 1.13.0
+
+ ```
+
+ ### Option 2: API Key Authentication (Easier for Getting Started)
+
+ Set these environment variables:
+
+ ```bash
+ set SPEECH_ENDPOINT=https://your-resource-name.cognitiveservices.azure.com/
+ ```
+
+ **And** configure one of the following credential sources:
+ - **Managed Identity**: For apps running in Azure (App Service, Azure Functions, VMs, etc.)
+ - **Azure CLI**: Run `az login` on your development machine
+ - **Environment Variables**: Set `AZURE_TENANT_ID`, `AZURE_CLIENT_ID`, and `AZURE_CLIENT_SECRET`
+ - **Visual Studio Code or IntelliJ**: Sign in through your IDE
+
+ **Note**: You'll also need to assign the "Cognitive Services User" role to your identity:
+
+ ```bash
+ az role assignment create --assignee \
+ --role "Cognitive Services User" \
+ --scope /subscriptions//resourceGroups//providers/Microsoft.CognitiveServices/accounts/
+ ```
+
+ **Required dependency** for Entra ID authentication:
+
+ ```xml
+
+ com.azure
+ azure-identity
+ 1.13.0
+
+ ```
+
+4. **Audio File**: Some samples require an audio file named `sample-audio.wav` in the working directory
+
+## Authentication Methods
+
+All samples in this directory support **both authentication methods**:
+
+- **Entra ID (TokenCredential)**: Uses `DefaultAzureCredential` from azure-identity
+- **API Key (KeyCredential)**: Uses the `SPEECH_API_KEY` environment variable
+
+The samples will automatically detect which authentication method to use based on the environment variables you've set. If `SPEECH_API_KEY` is set, it will use API Key authentication; otherwise, it will attempt Entra ID authentication.
+
+## Available Samples
+
+### TranscribeAudioFileSample.java
+
+**Champion scenario**: Basic audio transcription from a file
+
+Demonstrates the most common use case - transcribing a single audio file with minimal configuration.
+
+**Key features**:
+
+- Creating a TranscriptionClient
+- Reading an audio file
+- Transcribing with default options
+- Processing results
+
+**Run**:
+
+```bash
+cd sdk/transcription/azure-ai-speech-transcription
+mvn exec:java -Dexec.mainClass="com.azure.ai.speech.transcription.TranscribeAudioFileSample"
+```
+
+---
+
+### TranscribeFromUrlSample.java
+
+**Champion scenario**: Transcribe audio from a URL
+
+Demonstrates how to transcribe audio directly from a URL without downloading the file locally.
+
+**Key features**:
+
+- Creating TranscriptionOptions with a URL
+- Transcribing remote audio files
+
+**Run**:
+
+```bash
+mvn exec:java -Dexec.mainClass="com.azure.ai.speech.transcription.TranscribeFromUrlSample"
+```
+
+---
+
+### TranscribeMultiLanguageSample.java
+
+**Champion scenario**: Multi-language transcription
+
+Demonstrates how to transcribe audio containing multiple languages with automatic language detection.
+
+**Key features**:
+
+- Automatic language detection
+- Handling multi-language results
+
+**Run**:
+
+```bash
+mvn exec:java -Dexec.mainClass="com.azure.ai.speech.transcription.TranscribeMultiLanguageSample"
+```
+
+---
+
+### EnhancedModeSample.java
+
+**Champion scenario**: Enhanced transcription quality
+
+Demonstrates how to use enhanced mode with custom prompts and other advanced features.
+
+**Key features**:
+
+- Using EnhancedModeOptions
+- Providing custom prompts for better accuracy
+- Specifying task types
+
+**Run**:
+
+```bash
+mvn exec:java -Dexec.mainClass="com.azure.ai.speech.transcription.EnhancedModeSample"
+```
+
+---
+
+### TranscribeWithDiarizationSample.java
+
+**Champion scenario**: Speaker diarization
+
+Demonstrates how to identify different speakers in the audio.
+
+**Key features**:
+
+- Enabling speaker diarization
+- Configuring max speakers
+- Processing speaker-separated results
+
+**Run**:
+
+```bash
+mvn exec:java -Dexec.mainClass="com.azure.ai.speech.transcription.TranscribeWithDiarizationSample"
+```
+
+---
+
+### TranscribeWithPhraseListSample.java
+
+**Champion scenario**: Improving accuracy with phrase lists
+
+Demonstrates how to use a phrase list to improve recognition of specific terms.
+
+**Key features**:
+
+- Creating a PhraseListOptions
+- Adding custom phrases and boosting their probability
+- Improving accuracy for domain-specific terminology
+
+**Run**:
+
+```bash
+mvn exec:java -Dexec.mainClass="com.azure.ai.speech.transcription.TranscribeWithPhraseListSample"
+```
+
+---
+
+### TranscribeWithProfanityFilterSample.java
+
+**Champion scenario**: Profanity filtering
+
+Demonstrates how to configure profanity filtering options.
+
+**Key features**:
+
+- Setting ProfanityFilterMode (Masked, Removed, None)
+- Handling filtered results
+
+**Run**:
+
+```bash
+mvn exec:java -Dexec.mainClass="com.azure.ai.speech.transcription.TranscribeWithProfanityFilterSample"
+```
+
+---
+
+### ReadmeSamples.java
+
+Code snippets used in the main README.md and API documentation (JavaDoc).
+
+**Note**: This file is used by the `codesnippet-maven-plugin` to inject code into documentation. It's not meant to be run directly.
+
+## Supported Audio Formats
+
+The service supports various audio formats:
+
+- **WAV** (recommended: 16 kHz, 16-bit, mono PCM)
+- **MP3**
+- **OGG**
+- **FLAC**
+- And more
+
+**Constraints**:
+
+- Maximum file size: 250 MB
+- Maximum duration: 2 hours
+
+## Getting Help
+
+- [Azure AI Speech Documentation](https://learn.microsoft.com/azure/ai-services/speech-service/)
+- [SDK README](https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/transcription/azure-ai-speech-transcription)
+- [GitHub Issues](https://github.com/Azure/azure-sdk-for-java/issues)
+
+## Additional Resources
+
+- [Azure SDK for Java Guidelines](https://azure.github.io/azure-sdk/java_introduction.html)
+- [Project Reactor Documentation](https://projectreactor.io/docs)
+- [Azure SDK Blog](https://devblogs.microsoft.com/azure-sdk/)
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/ReadmeSamples.java b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/ReadmeSamples.java
new file mode 100644
index 000000000000..6dfd93d716c4
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/ReadmeSamples.java
@@ -0,0 +1,423 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.speech.transcription;
+
+import com.azure.ai.speech.transcription.models.AudioFileDetails;
+import com.azure.ai.speech.transcription.models.EnhancedModeOptions;
+import com.azure.ai.speech.transcription.models.PhraseListOptions;
+import com.azure.ai.speech.transcription.models.ProfanityFilterMode;
+import com.azure.ai.speech.transcription.models.TranscriptionDiarizationOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionResult;
+import com.azure.core.credential.KeyCredential;
+import com.azure.core.http.policy.ExponentialBackoffOptions;
+import com.azure.core.http.policy.HttpLogDetailLevel;
+import com.azure.core.http.policy.HttpLogOptions;
+import com.azure.core.http.policy.RetryOptions;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+public final class ReadmeSamples {
+ /**
+ * Sample for basic audio transcription.
+ */
+ public void readmeSamples() {
+ // BEGIN: com.azure.ai.speech.transcription.readme
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildClient();
+
+ try {
+ // Read audio file
+ byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
+
+ // Create audio file details
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Create transcription options
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
+
+ // Transcribe audio
+ TranscriptionResult result = client.transcribe(options);
+
+ // Process results
+ System.out.println("Duration: " + result.getDuration() + " ms");
+ result.getCombinedPhrases().forEach(phrase -> {
+ System.out.println("Channel " + phrase.getChannel() + ": " + phrase.getText());
+ });
+ } catch (Exception e) {
+ System.err.println("Error during transcription: " + e.getMessage());
+ }
+ // END: com.azure.ai.speech.transcription.readme
+ }
+
+ /**
+ * Sample for creating a synchronous TranscriptionClient.
+ */
+ public void createSyncClient() {
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionclient.instantiation
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildClient();
+ // END: com.azure.ai.speech.transcription.transcriptionclient.instantiation
+ }
+
+ /**
+ * Sample for creating an asynchronous TranscriptionAsyncClient.
+ */
+ public void createAsyncClient() {
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionasyncclient.instantiation
+ TranscriptionAsyncClient asyncClient = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildAsyncClient();
+ // END: com.azure.ai.speech.transcription.transcriptionasyncclient.instantiation
+ }
+
+ /**
+ * Sample for transcribing audio with the synchronous client.
+ */
+ public void transcribeAudioSync() throws Exception {
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildClient();
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionclient.transcribe
+ byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
+
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
+
+ TranscriptionResult result = client.transcribe(options);
+
+ System.out.println("Duration: " + result.getDuration() + " ms");
+ result.getCombinedPhrases().forEach(phrase -> {
+ System.out.println("Transcription: " + phrase.getText());
+ });
+ // END: com.azure.ai.speech.transcription.transcriptionclient.transcribe
+ }
+
+ /**
+ * Sample for transcribing audio with the asynchronous client.
+ */
+ public void transcribeAudioAsync() throws Exception {
+ TranscriptionAsyncClient asyncClient = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildAsyncClient();
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionasyncclient.transcribe
+ byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
+
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
+
+ asyncClient.transcribe(options)
+ .subscribe(result -> {
+ System.out.println("Duration: " + result.getDuration() + " ms");
+ result.getCombinedPhrases().forEach(phrase -> {
+ System.out.println("Transcription: " + phrase.getText());
+ });
+ });
+ // END: com.azure.ai.speech.transcription.transcriptionasyncclient.transcribe
+ }
+
+ /**
+ * Sample for configuring advanced transcription options.
+ */
+ public void advancedTranscriptionOptions() throws Exception {
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildClient();
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionoptions.advanced
+ byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
+
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Configure advanced options
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
+ .setLocales(java.util.Arrays.asList("en-US", "es-ES")) // Specify candidate locales
+ .setProfanityFilterMode(ProfanityFilterMode.MASKED) // Mask profanity
+ .setDiarizationOptions(new TranscriptionDiarizationOptions() // Enable speaker diarization
+ .setMaxSpeakers(5));
+
+ TranscriptionResult result = client.transcribe(options);
+
+ // Access detailed results
+ result.getPhrases().forEach(phrase -> {
+ System.out.println("Speaker " + phrase.getSpeaker() + ": " + phrase.getText());
+ System.out.println("Confidence: " + phrase.getConfidence());
+ System.out.println("Offset: " + phrase.getOffset() + " ms");
+ });
+ // END: com.azure.ai.speech.transcription.transcriptionoptions.advanced
+ }
+
+ /**
+ * Sample for building client with custom configuration.
+ */
+ public void clientWithCustomConfiguration() {
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionclientbuilder.configuration
+ HttpLogOptions logOptions = new HttpLogOptions()
+ .setLogLevel(HttpLogDetailLevel.BODY_AND_HEADERS);
+
+ RetryOptions retryOptions = new RetryOptions(new ExponentialBackoffOptions()
+ .setMaxRetries(5)
+ .setBaseDelay(java.time.Duration.ofSeconds(1))
+ .setMaxDelay(java.time.Duration.ofSeconds(60)));
+
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .httpLogOptions(logOptions)
+ .retryOptions(retryOptions)
+ .buildClient();
+ // END: com.azure.ai.speech.transcription.transcriptionclientbuilder.configuration
+ }
+
+ /**
+ * Sample for processing detailed transcription results.
+ */
+ public void processDetailedResults() throws Exception {
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildClient();
+
+ byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionresult.detailed
+ TranscriptionResult result = client.transcribe(options);
+
+ // Get overall duration
+ System.out.println("Total duration: " + result.getDuration() + " ms");
+
+ // Process each phrase with detailed information
+ result.getPhrases().forEach(phrase -> {
+ System.out.println("\nPhrase: " + phrase.getText());
+ System.out.println(" Channel: " + phrase.getChannel());
+ System.out.println(" Speaker: " + phrase.getSpeaker());
+ System.out.println(" Locale: " + phrase.getLocale());
+ System.out.println(" Confidence: " + phrase.getConfidence());
+ System.out.println(" Timing: " + phrase.getOffset() + " ms - "
+ + (phrase.getOffset() + phrase.getDuration().toMillis()) + " ms");
+
+ // Process individual words with timestamps
+ if (phrase.getWords() != null) {
+ phrase.getWords().forEach(word -> {
+ System.out.println(" Word: " + word.getText() + " @ "
+ + word.getOffset() + " ms");
+ });
+ }
+ });
+ // END: com.azure.ai.speech.transcription.transcriptionresult.detailed
+ }
+
+ /**
+ * Sample for using enhanced mode to improve transcription quality.
+ */
+ public void enhancedModeBasic() throws Exception {
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildClient();
+
+ // BEGIN: readme-sample-enhancedModeBasic
+ byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
+
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Enhanced mode is automatically enabled when you create EnhancedModeOptions
+ EnhancedModeOptions enhancedMode = new EnhancedModeOptions()
+ .setPrompts(java.util.Arrays.asList(
+ "Output must be in lexical format."
+ ));
+
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
+ .setLocales(java.util.Arrays.asList("en-US", "es-ES"))
+ .setEnhancedModeOptions(enhancedMode);
+
+ TranscriptionResult result = client.transcribe(options);
+ // END: readme-sample-enhancedModeBasic
+ }
+
+ /**
+ * Sample for using enhanced mode with custom prompts.
+ */
+ public void enhancedModeWithPrompts() throws Exception {
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildClient();
+
+ // BEGIN: readme-sample-enhancedModeWithPrompts
+ byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
+
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Enhanced mode is automatically enabled
+ // Use prompts to guide transcription with domain-specific terminology
+ // Always include lexical format prompt for best results
+ EnhancedModeOptions enhancedMode = new EnhancedModeOptions()
+ .setPrompts(java.util.Arrays.asList(
+ "Output must be in lexical format.",
+ "Medical consultation discussing hypertension and diabetes",
+ "Common medications: metformin, lisinopril, atorvastatin",
+ "Patient symptoms and treatment plan"
+ ));
+
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
+ .setLocales(java.util.Arrays.asList("en-US", "es-ES"))
+ .setEnhancedModeOptions(enhancedMode);
+
+ TranscriptionResult result = client.transcribe(options);
+ // END: readme-sample-enhancedModeWithPrompts
+ }
+
+ /**
+ * Sample for using enhanced mode with translation.
+ */
+ public void enhancedModeWithTranslation() throws Exception {
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildClient();
+
+ // BEGIN: readme-sample-enhancedModeWithTranslation
+ byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
+
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Enhanced mode is automatically enabled
+ // Configure enhanced mode to transcribe Spanish audio and translate to English
+ EnhancedModeOptions enhancedMode = new EnhancedModeOptions()
+ .setTargetLanguage("en-US"); // Translate to English
+
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
+ .setLocales(java.util.Arrays.asList("es-ES")) // Source language: Spanish
+ .setEnhancedModeOptions(enhancedMode);
+
+ TranscriptionResult result = client.transcribe(options);
+ // END: readme-sample-enhancedModeWithTranslation
+ }
+
+ /**
+ * Sample for transcribing audio using audio URL constructor.
+ */
+ public void transcribeWithAudioUrl() {
+ // BEGIN: readme-sample-transcribeWithAudioUrl
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildClient();
+
+ // Create transcription options with audio URL
+ TranscriptionOptions options = new TranscriptionOptions("https://example.com/audio.wav");
+
+ // Transcribe audio
+ TranscriptionResult result = client.transcribe(options);
+
+ // Process results
+ result.getCombinedPhrases().forEach(phrase -> {
+ System.out.println(phrase.getText());
+ });
+ // END: readme-sample-transcribeWithAudioUrl
+ }
+
+ /**
+ * Sample for multi-language transcription.
+ */
+ public void transcribeMultiLanguage() throws Exception {
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildClient();
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionoptions.multilanguage
+ byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
+
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Configure transcription WITHOUT specifying locales
+ // This allows the service to auto-detect and transcribe multiple languages
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
+
+ TranscriptionResult result = client.transcribe(options);
+
+ result.getPhrases().forEach(phrase -> {
+ System.out.println("Language: " + phrase.getLocale());
+ System.out.println("Text: " + phrase.getText());
+ });
+ // END: com.azure.ai.speech.transcription.transcriptionoptions.multilanguage
+ }
+
+ /**
+ * Sample for enhanced mode transcription.
+ */
+ public void transcribeEnhancedMode() throws Exception {
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildClient();
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionoptions.enhancedmode
+ byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
+
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Enhanced mode is automatically enabled
+ EnhancedModeOptions enhancedMode = new EnhancedModeOptions()
+ .setTask("transcribe")
+ .setPrompts(java.util.Arrays.asList("Output must be in lexical format."));
+
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
+ .setEnhancedModeOptions(enhancedMode);
+
+ TranscriptionResult result = client.transcribe(options);
+
+ System.out.println("Transcription: " + result.getCombinedPhrases().get(0).getText());
+ // END: com.azure.ai.speech.transcription.transcriptionoptions.enhancedmode
+ }
+
+ /**
+ * Sample for transcription with phrase list.
+ */
+ public void transcribeWithPhraseList() throws Exception {
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint("https://.cognitiveservices.azure.com/")
+ .credential(new KeyCredential(""))
+ .buildClient();
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionoptions.phraselist
+ byte[] audioData = Files.readAllBytes(Paths.get("path/to/audio.wav"));
+
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ PhraseListOptions phraseListOptions = new PhraseListOptions()
+ .setPhrases(java.util.Arrays.asList("Azure", "Cognitive Services"))
+ .setBiasingWeight(5.0);
+
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
+ .setPhraseListOptions(phraseListOptions);
+
+ TranscriptionResult result = client.transcribe(options);
+
+ result.getCombinedPhrases().forEach(phrase -> {
+ System.out.println(phrase.getText());
+ });
+ // END: com.azure.ai.speech.transcription.transcriptionoptions.phraselist
+ }
+}
+
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeAudioFileSample.java b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeAudioFileSample.java
new file mode 100644
index 000000000000..382d5c473bef
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeAudioFileSample.java
@@ -0,0 +1,59 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.speech.transcription;
+
+import com.azure.ai.speech.transcription.models.AudioFileDetails;
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionResult;
+import com.azure.core.credential.KeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+/**
+ * Simplest possible example of transcribing an audio file.
+ *
+ * This sample demonstrates the absolute minimum code needed to:
+ * 1. Create a client
+ * 2. Load an audio file
+ * 3. Transcribe it
+ * 4. Get the text result
+ */
+public class TranscribeAudioFileSample {
+ public static void main(String[] args) {
+ try {
+ // Get credentials from environment variables
+ String endpoint = System.getenv("SPEECH_ENDPOINT");
+ String apiKey = System.getenv("SPEECH_API_KEY");
+
+ // Create client
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(apiKey))
+ .buildClient();
+
+ // Load audio file
+ String audioFilePath = "src/samples/assets/sample-audio.wav";
+ byte[] audioData = Files.readAllBytes(Paths.get(audioFilePath));
+
+ // Create audio file details
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Transcribe
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
+ TranscriptionResult result = client.transcribe(options);
+
+ // Print result
+ System.out.println("Transcription:");
+ result.getCombinedPhrases().forEach(phrase ->
+ System.out.println(phrase.getText())
+ );
+
+ } catch (Exception e) {
+ System.err.println("Error: " + e.getMessage());
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeFromUrlSample.java b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeFromUrlSample.java
new file mode 100644
index 000000000000..b1f77c0beba9
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeFromUrlSample.java
@@ -0,0 +1,64 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.speech.transcription;
+
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionResult;
+import com.azure.core.credential.AzureKeyCredential;
+import java.util.Arrays;
+
+/**
+ * Sample demonstrates how to transcribe audio from a URL.
+ */
+public class TranscribeFromUrlSample {
+
+ /**
+ * Main method to invoke this demo.
+ *
+ * @param args Unused arguments to the program.
+ */
+ public static void main(String[] args) {
+ String endpoint = System.getenv("SPEECH_ENDPOINT");
+ String apiKey = System.getenv("SPEECH_API_KEY");
+
+ if (endpoint == null || apiKey == null) {
+ System.err.println("Please set SPEECH_ENDPOINT and SPEECH_API_KEY environment variables.");
+ System.exit(1);
+ }
+
+ // Create the transcription client
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new AzureKeyCredential(apiKey))
+ .buildClient();
+
+ System.out.println("Azure AI Speech Transcription - Transcribe from URL Sample");
+ System.out.println("============================================================\n");
+
+ // Audio file URL (must be publicly accessible)
+ // Using sample audio from Azure documentation
+ String audioUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-speech-sdk/master/sampledata/audiofiles/aboutSpeechSdk.wav";
+
+ System.out.println("Transcribing audio from URL: " + audioUrl);
+ System.out.println();
+
+ // Create transcription options with audio URL
+ TranscriptionOptions options = new TranscriptionOptions(audioUrl)
+ .setLocales(Arrays.asList("en-US"));
+
+ // Transcribe the audio from URL
+ TranscriptionResult result = client.transcribe(options);
+
+ // Display results
+ System.out.println("Transcription Results:");
+ System.out.println("---------------------");
+ System.out.println("Duration: " + result.getDuration() + "\n");
+
+ if (result.getCombinedPhrases() != null && !result.getCombinedPhrases().isEmpty()) {
+ System.out.println("Combined text: " + result.getCombinedPhrases().get(0).getText());
+ }
+
+ System.out.println();
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeMultiLanguageSample.java b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeMultiLanguageSample.java
new file mode 100644
index 000000000000..fedc4499806e
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeMultiLanguageSample.java
@@ -0,0 +1,108 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.speech.transcription;
+
+import com.azure.ai.speech.transcription.models.AudioFileDetails;
+import com.azure.ai.speech.transcription.models.TranscribedPhrase;
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionResult;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+/**
+ * Sample demonstrates how to transcribe audio containing multiple languages.
+ *
+ * When locales are NOT specified, the service automatically detects and transcribes
+ * multiple languages within the same audio file, switching between them as needed.
+ * This is useful for:
+ * - Multilingual conversations
+ * - Code-switched speech (e.g., Spanish-English)
+ * - International meetings or interviews
+ */
+public class TranscribeMultiLanguageSample {
+
+ /**
+ * Main method to invoke this demo.
+ *
+ * @param args Unused arguments to the program.
+ */
+ public static void main(String[] args) {
+ String endpoint = System.getenv("SPEECH_ENDPOINT");
+ String apiKey = System.getenv("SPEECH_API_KEY");
+
+ if (endpoint == null || apiKey == null) {
+ System.err.println("Please set SPEECH_ENDPOINT and SPEECH_API_KEY environment variables.");
+ System.exit(1);
+ }
+
+ System.out.println("Azure AI Speech Transcription - Multi-Language Sample");
+ System.out.println("=====================================================\n");
+
+ // Create the transcription client
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new AzureKeyCredential(apiKey))
+ .buildClient();
+
+ try {
+ // Load audio file
+ String audioFilePath = "src/samples/assets/sample-audio.wav";
+ byte[] audioData = Files.readAllBytes(Paths.get(audioFilePath));
+ AudioFileDetails fileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Configure transcription WITHOUT specifying locales
+ // This allows the service to auto-detect and transcribe multiple languages
+ // within the same audio file, switching between them as needed
+ TranscriptionOptions options = new TranscriptionOptions(fileDetails);
+
+ System.out.println("Transcribing with automatic multi-language detection...");
+ System.out.println("(No locale specified - service will detect all languages)\n");
+
+ // Transcribe the audio
+ TranscriptionResult result = client.transcribe(options);
+
+ // Display results
+ System.out.println("Transcription Results:");
+ System.out.println("---------------------");
+ System.out.println("Duration: " + result.getDuration());
+ System.out.println("Total phrases found: " + (result.getPhrases() != null ? result.getPhrases().size() : 0));
+ System.out.println("Total combined phrases: " + (result.getCombinedPhrases() != null ? result.getCombinedPhrases().size() : 0));
+ System.out.println();
+
+ // Show detailed phrases with timestamps
+ if (result.getPhrases() != null && !result.getPhrases().isEmpty()) {
+ System.out.println("Detailed Phrases:");
+ System.out.println("-----------------");
+
+ for (int i = 0; i < result.getPhrases().size(); i++) {
+ TranscribedPhrase phrase = result.getPhrases().get(i);
+ long offsetMs = phrase.getOffset();
+ long durationMs = phrase.getDuration().toMillis();
+
+ System.out.println("\n[Phrase " + (i + 1) + "] " + offsetMs + "ms - " + (offsetMs + durationMs) + "ms");
+ System.out.println("Locale: " + phrase.getLocale());
+ System.out.println("Text: " + phrase.getText());
+ }
+ }
+
+ // Also show combined phrases per channel
+ if (result.getCombinedPhrases() != null && !result.getCombinedPhrases().isEmpty()) {
+ System.out.println("\n\nCombined Transcription (All Languages):");
+ System.out.println("========================================");
+ result.getCombinedPhrases().forEach(phrase -> {
+ System.out.println(phrase.getText());
+ });
+ }
+
+ System.out.println("\nNote: When no locales are specified, the service transcribes all languages");
+ System.out.println("present in the audio. However, the locale field in each phrase may not always");
+ System.out.println("accurately reflect the actual language of that specific phrase.");
+ } catch (Exception e) {
+ System.err.println("Error: " + e.getMessage());
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeWithDiarizationSample.java b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeWithDiarizationSample.java
new file mode 100644
index 000000000000..bf57f4308f36
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeWithDiarizationSample.java
@@ -0,0 +1,86 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.speech.transcription;
+
+import com.azure.ai.speech.transcription.models.AudioFileDetails;
+import com.azure.ai.speech.transcription.models.TranscribedPhrase;
+import com.azure.ai.speech.transcription.models.TranscriptionDiarizationOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionResult;
+import com.azure.core.credential.KeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+/**
+ * Sample demonstrates using speaker diarization to identify different speakers in audio.
+ *
+ * Speaker diarization detects and separates different speakers in the audio, labeling
+ * each transcribed segment with a speaker ID. This is useful for:
+ * - Meeting transcriptions
+ * - Interview recordings
+ * - Multi-person conversations
+ * - Podcast transcriptions
+ */
+public class TranscribeWithDiarizationSample {
+ public static void main(String[] args) {
+ String endpoint = System.getenv("SPEECH_ENDPOINT");
+ String apiKey = System.getenv("SPEECH_API_KEY");
+
+ if (endpoint == null || apiKey == null) {
+ System.err.println("Please set SPEECH_ENDPOINT and SPEECH_API_KEY environment variables");
+ return;
+ }
+
+ System.out.println("Azure AI Speech Transcription - Speaker Diarization Sample");
+ System.out.println("===========================================================\n");
+
+ // Create client
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(apiKey))
+ .buildClient();
+
+ try {
+ // Load audio file
+ String audioFilePath = "src/samples/assets/sample-audio.wav";
+ byte[] audioData = Files.readAllBytes(Paths.get(audioFilePath));
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Configure speaker diarization
+ TranscriptionDiarizationOptions diarizationOptions = new TranscriptionDiarizationOptions()
+ .setMaxSpeakers(5); // Maximum number of speakers to detect (2-36)
+
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
+ .setDiarizationOptions(diarizationOptions);
+
+ System.out.println("Transcribing with speaker diarization (max 5 speakers)...\n");
+
+ // Transcribe with diarization
+ TranscriptionResult result = client.transcribe(options);
+
+ // Display results organized by speaker
+ System.out.println("Transcription with Speaker Information:");
+ System.out.println("----------------------------------------");
+
+ if (result.getPhrases() != null && !result.getPhrases().isEmpty()) {
+ for (TranscribedPhrase phrase : result.getPhrases()) {
+ int speakerId = phrase.getSpeaker() != null ? phrase.getSpeaker() : 0;
+ double startTime = phrase.getOffset() / 1000.0;
+ double endTime = (phrase.getOffset() + phrase.getDuration().toMillis()) / 1000.0;
+
+ System.out.println(String.format("\n[Speaker %d] (%.2fs - %.2fs)",
+ speakerId, startTime, endTime));
+ System.out.println(phrase.getText());
+ }
+ }
+
+
+ } catch (Exception e) {
+ System.err.println("Error: " + e.getMessage());
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeWithPhraseListSample.java b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeWithPhraseListSample.java
new file mode 100644
index 000000000000..af07db34935e
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeWithPhraseListSample.java
@@ -0,0 +1,97 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.speech.transcription;
+
+import com.azure.ai.speech.transcription.models.AudioFileDetails;
+import com.azure.ai.speech.transcription.models.PhraseListOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionResult;
+import com.azure.core.credential.KeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Arrays;
+
+/**
+ * Sample demonstrates using a phrase list to improve recognition accuracy for specific terms.
+ *
+ * Phrase lists help the speech service better recognize domain-specific terminology,
+ * proper nouns, and uncommon words that might otherwise be misrecognized.
+ */
+public class TranscribeWithPhraseListSample {
+ public static void main(String[] args) {
+ String endpoint = System.getenv("SPEECH_ENDPOINT");
+ String apiKey = System.getenv("SPEECH_API_KEY");
+
+ if (endpoint == null || apiKey == null) {
+ System.err.println("Please set SPEECH_ENDPOINT and SPEECH_API_KEY environment variables");
+ return;
+ }
+
+ System.out.println("Azure AI Speech Transcription - Phrase List Sample");
+ System.out.println("====================================================\n");
+
+ // Create client
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(apiKey))
+ .buildClient();
+
+ try {
+ // Load audio file
+ String audioFilePath = "src/samples/assets/sample-audio.wav";
+ byte[] audioData = Files.readAllBytes(Paths.get(audioFilePath));
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Create phrase list with custom terms
+ // Add phrases that appear in your audio for better recognition
+ PhraseListOptions phraseListOptions = new PhraseListOptions()
+ .setPhrases(Arrays.asList(
+ "Mary",
+ "El Mundo",
+ "Secret Garden",
+ "empleada doméstica",
+ "habitación"
+ ))
+ .setBiasingWeight(5.0); // Weight range: 1.0-20.0 (higher = stronger bias)
+
+ // Create transcription options with phrase list
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
+ .setPhraseListOptions(phraseListOptions);
+
+ System.out.println("Custom phrase list:");
+ phraseListOptions.getPhrases().forEach(phrase ->
+ System.out.println(" - " + phrase)
+ );
+ System.out.println("\nBiasing weight: " + phraseListOptions.getBiasingWeight());
+ System.out.println("\nTranscribing with phrase list...\n");
+
+ // Transcribe
+ TranscriptionResult result = client.transcribe(options);
+
+ System.out.println("Transcription result:");
+ System.out.println("---------------------");
+ result.getCombinedPhrases().forEach(phrase ->
+ System.out.println(phrase.getText())
+ );
+
+ // Print individual phrases with timing information
+ if (result.getPhrases() != null && !result.getPhrases().isEmpty()) {
+ System.out.println("\nDetailed phrases:");
+ result.getPhrases().forEach(phrase ->
+ System.out.println(String.format(" [%dms]: %s",
+ phrase.getOffset(),
+ phrase.getText()))
+ );
+ }
+
+ System.out.println("\n Transcription completed successfully!");
+
+ } catch (Exception e) {
+ System.err.println("Error: " + e.getMessage());
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeWithProfanityFilterSample.java b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeWithProfanityFilterSample.java
new file mode 100644
index 000000000000..40a67d1b0188
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/TranscribeWithProfanityFilterSample.java
@@ -0,0 +1,80 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.speech.transcription;
+
+import com.azure.ai.speech.transcription.models.AudioFileDetails;
+import com.azure.ai.speech.transcription.models.ProfanityFilterMode;
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionResult;
+import com.azure.core.credential.KeyCredential;
+import com.azure.core.util.BinaryData;
+
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+/**
+ * Sample demonstrates profanity filtering in Azure AI Speech Transcription.
+ * Shows the difference between NONE (raw), MASKED (f***), REMOVED (omitted), and TAGS (XML tagged).
+ */
+public class TranscribeWithProfanityFilterSample {
+ /**
+ * Main method to run the profanity filter sample.
+ *
+ * @param args command line arguments (not used)
+ */
+ public static void main(String[] args) {
+ System.out.println("Azure AI Speech Transcription - Profanity Filter Sample");
+ System.out.println("==========================================================\n");
+
+ String endpoint = System.getenv("SPEECH_ENDPOINT");
+ String apiKey = System.getenv("SPEECH_API_KEY");
+
+ if (endpoint == null || apiKey == null) {
+ System.err.println("Please set SPEECH_ENDPOINT and SPEECH_API_KEY environment variables");
+ return;
+ }
+
+ try {
+ // Create the transcription client
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(apiKey))
+ .buildClient();
+
+ // Load audio file
+ String audioFilePath = "src/samples/assets/sample-profanity.wav";
+ byte[] audioData = Files.readAllBytes(Paths.get(audioFilePath));
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Demonstrate different profanity filter modes
+ ProfanityFilterMode[] modes = {
+ ProfanityFilterMode.NONE,
+ ProfanityFilterMode.MASKED,
+ ProfanityFilterMode.REMOVED,
+ ProfanityFilterMode.TAGS
+ };
+
+ for (ProfanityFilterMode mode : modes) {
+ System.out.println("Transcribing with profanity filter mode: " + mode);
+ System.out.println("----------------------------------------------");
+
+ // Create transcription options with profanity filter
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
+ .setProfanityFilterMode(mode);
+
+ // Perform transcription
+ TranscriptionResult result = client.transcribe(options);
+
+ // Display results
+ System.out.println("Combined text: " + result.getCombinedPhrases().get(0).getText());
+ System.out.println();
+ }
+
+
+ } catch (Exception e) {
+ System.err.println("Error during transcription: " + e.getMessage());
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/javadoccodesnippets/TranscriptionAsyncClientJavaDocCodeSnippets.java b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/javadoccodesnippets/TranscriptionAsyncClientJavaDocCodeSnippets.java
new file mode 100644
index 000000000000..5dc618dee834
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/javadoccodesnippets/TranscriptionAsyncClientJavaDocCodeSnippets.java
@@ -0,0 +1,215 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Source code snippets from this file are embedded in Transcription SDK JavaDoc (API documentation).
+
+package com.azure.ai.speech.transcription.javadoccodesnippets;
+
+import com.azure.ai.speech.transcription.TranscriptionAsyncClient;
+import com.azure.ai.speech.transcription.TranscriptionClientBuilder;
+import com.azure.ai.speech.transcription.models.AudioFileDetails;
+import com.azure.ai.speech.transcription.models.ProfanityFilterMode;
+import com.azure.ai.speech.transcription.models.TranscriptionDiarizationOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionResult;
+import com.azure.core.credential.KeyCredential;
+import com.azure.core.util.BinaryData;
+import reactor.core.publisher.Mono;
+
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.time.Duration;
+import java.util.Arrays;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Code snippets for {@link TranscriptionAsyncClient} JavaDoc documentation.
+ */
+public class TranscriptionAsyncClientJavaDocCodeSnippets {
+
+ private static String endpoint = System.getenv("SPEECH_ENDPOINT");
+ private static String key = System.getenv("SPEECH_API_KEY");
+
+ /**
+ * Sample for creating an asynchronous TranscriptionAsyncClient with API key authentication.
+ */
+ public void createAsyncClientWithApiKey() {
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionasyncclient.instantiation.apikey
+ TranscriptionAsyncClient asyncClient = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(key))
+ .buildAsyncClient();
+ // END: com.azure.ai.speech.transcription.transcriptionasyncclient.instantiation.apikey
+ }
+
+ /**
+ * Sample for transcribing audio asynchronously using subscribe pattern.
+ */
+ public void transcribeAsyncWithSubscribe() throws Exception {
+ TranscriptionAsyncClient asyncClient = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(key))
+ .buildAsyncClient();
+
+ byte[] audioData = Files.readAllBytes(Paths.get("sample.wav"));
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionasyncclient.transcribe.subscribe
+ CountDownLatch latch = new CountDownLatch(1);
+
+ asyncClient.transcribe(options)
+ .subscribe(
+ // onNext: Process result
+ result -> {
+ System.out.println("Duration: " + result.getDuration() + " ms");
+ if (result.getCombinedPhrases() != null) {
+ result.getCombinedPhrases().forEach(phrase ->
+ System.out.println("Text: " + phrase.getText())
+ );
+ }
+ latch.countDown();
+ },
+ // onError: Handle error
+ error -> {
+ System.err.println("Error: " + error.getMessage());
+ latch.countDown();
+ },
+ // onComplete: Completion handler
+ () -> System.out.println("Transcription completed")
+ );
+
+ latch.await(60, TimeUnit.SECONDS);
+ // END: com.azure.ai.speech.transcription.transcriptionasyncclient.transcribe.subscribe
+ }
+
+ /**
+ * Sample for transcribing audio asynchronously using block pattern.
+ */
+ public void transcribeAsyncWithBlock() throws Exception {
+ TranscriptionAsyncClient asyncClient = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(key))
+ .buildAsyncClient();
+
+ byte[] audioData = Files.readAllBytes(Paths.get("sample.wav"));
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionasyncclient.transcribe.block
+ // Use block() to convert async call to sync
+ TranscriptionResult result = asyncClient.transcribe(options).block();
+
+ if (result != null) {
+ System.out.println("Duration: " + result.getDuration() + " ms");
+ }
+ // END: com.azure.ai.speech.transcription.transcriptionasyncclient.transcribe.block
+ }
+
+ /**
+ * Sample for transcribing audio asynchronously with advanced options.
+ */
+ public void transcribeAsyncWithOptions() throws Exception {
+ TranscriptionAsyncClient asyncClient = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(key))
+ .buildAsyncClient();
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionasyncclient.transcribe.options
+ byte[] audioData = Files.readAllBytes(Paths.get("sample.wav"));
+
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Configure advanced transcription options
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
+ .setLocales(Arrays.asList("en-US", "es-ES"))
+ .setProfanityFilterMode(ProfanityFilterMode.MASKED)
+ .setDiarizationOptions(new TranscriptionDiarizationOptions().setMaxSpeakers(5));
+
+ // Transcribe asynchronously
+ Mono resultMono = asyncClient.transcribe(options);
+
+ // Process result
+ resultMono.subscribe(result -> {
+ if (result.getPhrases() != null) {
+ result.getPhrases().forEach(phrase -> {
+ System.out.printf("Speaker %d: %s%n",
+ phrase.getSpeaker(), phrase.getText());
+ });
+ }
+ });
+ // END: com.azure.ai.speech.transcription.transcriptionasyncclient.transcribe.options
+ }
+
+ /**
+ * Sample for transcribing audio asynchronously with timeout and error handling.
+ */
+ public void transcribeAsyncWithTimeoutAndErrorHandling() throws Exception {
+ TranscriptionAsyncClient asyncClient = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(key))
+ .buildAsyncClient();
+
+ byte[] audioData = Files.readAllBytes(Paths.get("sample.wav"));
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionasyncclient.transcribe.timeout
+ Mono resultMono = asyncClient.transcribe(options)
+ .timeout(Duration.ofMinutes(2))
+ .doOnError(error -> System.err.println("Error: " + error.getMessage()))
+ .onErrorResume(error -> {
+ System.err.println("Fallback: Returning empty result");
+ return Mono.empty();
+ });
+
+ TranscriptionResult result = resultMono.block();
+ // END: com.azure.ai.speech.transcription.transcriptionasyncclient.transcribe.timeout
+ }
+
+ /**
+ * Sample for processing detailed async transcription results.
+ */
+ public void processDetailedAsyncResults() throws Exception {
+ TranscriptionAsyncClient asyncClient = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(key))
+ .buildAsyncClient();
+
+ byte[] audioData = Files.readAllBytes(Paths.get("sample.wav"));
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionasyncclient.results.detailed
+ asyncClient.transcribe(options)
+ .subscribe(result -> {
+ // Access combined phrases
+ if (result.getCombinedPhrases() != null) {
+ result.getCombinedPhrases().forEach(channelPhrase ->
+ System.out.printf("[Channel %d] %s%n",
+ channelPhrase.getChannel(), channelPhrase.getText())
+ );
+ }
+
+ // Access detailed phrases with word-level timing
+ if (result.getPhrases() != null) {
+ result.getPhrases().forEach(phrase -> {
+ System.out.printf("Phrase (%.2f-%.2fs): %s%n",
+ phrase.getOffset() / 1000.0,
+ (phrase.getOffset() + phrase.getDuration().toMillis()) / 1000.0,
+ phrase.getText());
+
+ if (phrase.getWords() != null) {
+ phrase.getWords().forEach(word ->
+ System.out.printf(" \"%s\" at %.2fs%n",
+ word.getText(),
+ word.getOffset() / 1000.0)
+ );
+ }
+ });
+ }
+ });
+ // END: com.azure.ai.speech.transcription.transcriptionasyncclient.results.detailed
+ }
+}
+
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/javadoccodesnippets/TranscriptionClientJavaDocCodeSnippets.java b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/javadoccodesnippets/TranscriptionClientJavaDocCodeSnippets.java
new file mode 100644
index 000000000000..cc9ebd2c57fc
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/samples/java/com/azure/ai/speech/transcription/javadoccodesnippets/TranscriptionClientJavaDocCodeSnippets.java
@@ -0,0 +1,164 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Source code snippets from this file are embedded in Transcription SDK JavaDoc (API documentation).
+
+package com.azure.ai.speech.transcription.javadoccodesnippets;
+
+import com.azure.ai.speech.transcription.TranscriptionClient;
+import com.azure.ai.speech.transcription.TranscriptionClientBuilder;
+import com.azure.ai.speech.transcription.models.AudioFileDetails;
+import com.azure.ai.speech.transcription.models.ProfanityFilterMode;
+import com.azure.ai.speech.transcription.models.TranscriptionDiarizationOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionResult;
+import com.azure.core.credential.KeyCredential;
+import com.azure.core.credential.TokenCredential;
+import com.azure.core.util.BinaryData;
+import com.azure.identity.DefaultAzureCredentialBuilder;
+
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Arrays;
+
+/**
+ * Code snippets for {@link TranscriptionClient} JavaDoc documentation.
+ */
+public class TranscriptionClientJavaDocCodeSnippets {
+
+ private static String endpoint = System.getenv("SPEECH_ENDPOINT");
+ private static String key = System.getenv("SPEECH_API_KEY");
+
+ /**
+ * Sample for creating a synchronous TranscriptionClient with API key authentication.
+ */
+ public void createClientWithApiKey() {
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionclient.instantiation.apikey
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(key))
+ .buildClient();
+ // END: com.azure.ai.speech.transcription.transcriptionclient.instantiation.apikey
+ }
+
+ /**
+ * Sample for creating a synchronous TranscriptionClient with Entra ID authentication.
+ */
+ public void createClientWithTokenCredential() {
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionclient.instantiation.tokencredential
+ // Use DefaultAzureCredential for Entra ID authentication
+ TokenCredential credential = new DefaultAzureCredentialBuilder().build();
+
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(credential)
+ .buildClient();
+ // END: com.azure.ai.speech.transcription.transcriptionclient.instantiation.tokencredential
+ }
+
+ /**
+ * Sample for transcribing audio from a file with default options.
+ */
+ public void transcribeFromFile() throws Exception {
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(key))
+ .buildClient();
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionclient.transcribe.file
+ // Read audio file
+ byte[] audioData = Files.readAllBytes(Paths.get("sample.wav"));
+
+ // Create audio file details
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Create transcription options using the AudioFileDetails constructor
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
+
+ // Transcribe audio
+ TranscriptionResult result = client.transcribe(options);
+
+ // Process results
+ System.out.println("Duration: " + result.getDuration() + " ms");
+ result.getCombinedPhrases().forEach(phrase -> {
+ System.out.println("Channel " + phrase.getChannel() + ": " + phrase.getText());
+ });
+ // END: com.azure.ai.speech.transcription.transcriptionclient.transcribe.file
+ }
+
+ /**
+ * Sample for transcribing audio with advanced options.
+ */
+ public void transcribeWithOptions() throws Exception {
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(key))
+ .buildClient();
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionclient.transcribe.options
+ byte[] audioData = Files.readAllBytes(Paths.get("sample.wav"));
+
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+
+ // Configure advanced transcription options
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails)
+ .setLocales(Arrays.asList("en-US", "es-ES"))
+ .setProfanityFilterMode(ProfanityFilterMode.MASKED)
+ .setDiarizationOptions(new TranscriptionDiarizationOptions().setMaxSpeakers(5));
+
+ TranscriptionResult result = client.transcribe(options);
+
+ // Access detailed results
+ if (result.getPhrases() != null) {
+ result.getPhrases().forEach(phrase -> {
+ System.out.printf("Speaker %d: %s%n",
+ phrase.getSpeaker(), phrase.getText());
+ });
+ }
+ // END: com.azure.ai.speech.transcription.transcriptionclient.transcribe.options
+ }
+
+ /**
+ * Sample for processing detailed transcription results with word-level timing.
+ */
+ public void processDetailedResults() throws Exception {
+ TranscriptionClient client = new TranscriptionClientBuilder()
+ .endpoint(endpoint)
+ .credential(new KeyCredential(key))
+ .buildClient();
+
+ byte[] audioData = Files.readAllBytes(Paths.get("sample.wav"));
+ AudioFileDetails audioFileDetails = new AudioFileDetails(BinaryData.fromBytes(audioData));
+ TranscriptionOptions options = new TranscriptionOptions(audioFileDetails);
+ TranscriptionResult result = client.transcribe(options);
+
+ // BEGIN: com.azure.ai.speech.transcription.transcriptionclient.results.detailed
+ // Access sentence-level combined phrases
+ if (result.getCombinedPhrases() != null) {
+ result.getCombinedPhrases().forEach(channelPhrase -> {
+ System.out.printf("[Channel %d] %s%n",
+ channelPhrase.getChannel(), channelPhrase.getText());
+ });
+ }
+
+ // Access word-level details with timing
+ if (result.getPhrases() != null) {
+ result.getPhrases().forEach(phrase -> {
+ System.out.printf("Phrase (%.2f-%.2fs): %s%n",
+ phrase.getOffset() / 1000.0,
+ (phrase.getOffset() + phrase.getDuration().toMillis()) / 1000.0,
+ phrase.getText());
+
+ // Get word-level timing information
+ if (phrase.getWords() != null) {
+ phrase.getWords().forEach(word -> {
+ System.out.printf(" Word: \"%s\" at %.2fs%n",
+ word.getText(),
+ word.getOffset() / 1000.0);
+ });
+ }
+ });
+ }
+ // END: com.azure.ai.speech.transcription.transcriptionclient.results.detailed
+ }
+}
+
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/README.md b/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/README.md
new file mode 100644
index 000000000000..353f2605ba20
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/README.md
@@ -0,0 +1,214 @@
+# Azure AI Speech Transcription client library tests for Java
+
+This directory contains tests for the Azure AI Speech Transcription client library for Java.
+
+## Test Structure
+
+The tests are organized as follows:
+
+- **TranscriptionClientTestBase.java**: Base class containing common test infrastructure, helper methods, and validation logic. Includes support for both file-based and URL-based transcription.
+- **TranscriptionClientTest.java**: Tests for the synchronous `TranscriptionClient` (14 tests)
+- **TranscriptionAsyncClientTest.java**: Tests for the asynchronous `TranscriptionAsyncClient` (16 tests)
+- **generated/**: Auto-generated test templates (for reference only)
+
+## Prerequisites
+
+Before running the tests, you need:
+
+1. An Azure Cognitive Services Speech resource. Create one using the [Azure Portal](https://portal.azure.com/).
+2. Java Development Kit (JDK) 8 or later
+3. Maven 3.x or later
+4. A sample audio file for testing (WAV, MP3, or OGG format, shorter than 2 hours, smaller than 250 MB)
+
+## Set Environment Variables
+
+Set the following environment variables to run live tests:
+
+### Windows (PowerShell)
+
+```powershell
+$env:SPEECH_ENDPOINT = "https://.cognitiveservices.azure.com"
+$env:SPEECH_API_KEY = ""
+```
+
+### Windows (Command Prompt)
+
+```cmd
+set SPEECH_ENDPOINT=https://.cognitiveservices.azure.com
+set SPEECH_API_KEY=
+```
+
+### Linux/macOS (Bash)
+
+```bash
+export SPEECH_ENDPOINT="https://.cognitiveservices.azure.com"
+export SPEECH_API_KEY=""
+```
+
+## Configure Test Proxy
+
+The Azure SDK for Java uses a test proxy for recording and playing back HTTP interactions. This library has been migrated to use the test proxy following the [Test Proxy Migration Guide](https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/core/azure-core-test/TestProxyMigrationGuide.md).
+
+Test recordings are stored in the [azure-sdk-assets](https://github.com/Azure/azure-sdk-assets) repository and referenced via the `assets.json` file. Configure the test mode by setting the `AZURE_TEST_MODE` environment variable:
+
+### Live Mode (against live service)
+
+```powershell
+$env:AZURE_TEST_MODE = "LIVE"
+```
+
+This mode makes real HTTP calls to the Azure service. Use this when you want to test against the actual service.
+
+### Record Mode (record interactions)
+
+```powershell
+$env:AZURE_TEST_MODE = "RECORD"
+```
+
+This mode makes real HTTP calls and records them for later playback. Recordings are managed by the test-proxy tool and can be pushed to the azure-sdk-assets repository using:
+
+```bash
+test-proxy push -a assets.json
+```
+
+### Playback Mode (use recordings)
+
+```powershell
+$env:AZURE_TEST_MODE = "PLAYBACK"
+```
+
+This mode uses previously recorded HTTP interactions instead of making real calls. This is the default mode and doesn't require credentials.
+
+## Running Tests
+
+### Run All Tests
+
+From the `sdk/transcription/azure-ai-speech-transcription` directory:
+
+```bash
+mvn clean test
+```
+
+### Run Specific Test Class
+
+```bash
+mvn test -Dtest=TranscriptionClientTest
+```
+
+or
+
+```bash
+mvn test -Dtest=TranscriptionAsyncClientTest
+```
+
+### Run a Specific Test Method
+
+```bash
+mvn test -Dtest=TranscriptionClientTest#testTranscribeSyncBasicFromFile
+```
+
+## Test Organization
+
+### Synchronous Tests (TranscriptionClientTest)
+
+Tests for the synchronous `TranscriptionClient` (14 tests), including:
+
+- Basic transcription from file
+- Transcription from URL (using publicly accessible audio URL)
+- Transcription with language specification
+- Transcription with multiple languages
+- Transcription with speaker diarization
+- Transcription with profanity filtering
+- Transcription with word-level timestamps
+- Tests using `transcribeWithResponse()` method
+- Tests with custom RequestOptions
+
+### Asynchronous Tests (TranscriptionAsyncClientTest)
+
+Tests for the asynchronous `TranscriptionAsyncClient` (16 tests), mirroring the synchronous tests but using reactive programming patterns with `Mono` and `Flux`. Includes additional tests for:
+
+- Transcription from URL (using publicly accessible audio URL)
+- Error handling with invalid language codes
+- Placeholder tests for empty audio data and cancellation scenarios
+
+## Authentication
+
+The tests support two authentication methods:
+
+1. **Key-based authentication** (default): Uses the API key from `SPEECH_API_KEY` environment variable
+2. **Token-based authentication**: Uses Entra ID credentials via `DefaultAzureCredential`
+
+To test with token-based authentication, some tests use `createClient(false, true, sync)` where the first parameter is `false`.
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Missing environment variables**: Ensure `SPEECH_ENDPOINT` and `SPEECH_API_KEY` are set correctly
+2. **Missing sample audio file**: Make sure you have a `sample.wav` file in the test directory (WAV, MP3, or OGG format, shorter than 2 hours, smaller than 250 MB)
+3. **URL transcription failures**: URL-based transcription requires a specific API key tier that supports this feature. If URL tests fail with 401 errors, verify your Speech resource supports URL transcription.
+4. **Test proxy issues**: If playback tests fail, try running in LIVE or RECORD mode first to regenerate recordings
+5. **Network issues**: Check your network connection and firewall settings
+
+### Enable Detailed Logging
+
+To enable detailed HTTP logging during tests, set the logging level in your `logback-test.xml` or via environment variables:
+
+```powershell
+$env:AZURE_LOG_LEVEL = "verbose"
+```
+
+## Additional Resources
+
+- [Azure SDK for Java Test Documentation](https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/core/azure-core-test/README.md)
+- [TypeSpec Java QuickStart - Adding Tests](https://github.com/Azure/azure-sdk-for-java/wiki/TypeSpec-Java-QuickStart#adding-tests)
+- [Azure Speech Service Documentation](https://learn.microsoft.com/azure/cognitive-services/speech-service/)
+- [Azure SDK for Java Contributing Guide](https://github.com/Azure/azure-sdk-for-java/blob/main/CONTRIBUTING.md)
+
+## Test Coverage
+
+The current tests cover:
+
+- ✅ Client instantiation with different authentication methods
+- ✅ Basic transcription functionality from files
+- ✅ Transcription from publicly accessible URLs
+- ✅ Transcription with various options (language, diarization, profanity filter, timestamps)
+- ✅ Both synchronous and asynchronous clients
+- ✅ Methods with and without `Response` wrappers
+- ✅ Custom RequestOptions and headers
+- ✅ Error handling (invalid language codes)
+
+Areas for future enhancement:
+
+- ⏳ Empty audio data handling (placeholder test exists)
+- ⏳ Cancellation scenarios (placeholder test exists)
+- ⬜ Performance tests
+- ⬜ Concurrent request handling
+- ⬜ Edge cases (very long audio, multiple channels, etc.)
+
+## Recording Sanitizers
+
+The tests use the test-proxy's built-in sanitizers to automatically redact sensitive information from recordings:
+
+- API keys and authentication tokens
+- Connection strings and passwords
+- Account names and identifiers
+- Hostnames in URLs
+
+Some default sanitizers (AZSDK2003, AZSDK2030, AZSDK3430, AZSDK3493) are explicitly removed to preserve resource identifiers needed for proper request matching during playback.
+
+## Managing Test Recordings
+
+### Restore recordings from assets repo
+
+```bash
+test-proxy restore -a assets.json
+```
+
+### Push new recordings to assets repo
+
+```bash
+test-proxy push -a assets.json
+```
+
+This creates a new tag in the azure-sdk-assets repository and updates `assets.json` with the new tag reference.
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/TranscriptionAsyncClientTest.java b/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/TranscriptionAsyncClientTest.java
new file mode 100644
index 000000000000..2d46371ee25d
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/TranscriptionAsyncClientTest.java
@@ -0,0 +1,273 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.speech.transcription;
+
+import com.azure.ai.speech.transcription.models.ProfanityFilterMode;
+import com.azure.ai.speech.transcription.models.TranscriptionDiarizationOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.core.exception.HttpResponseException;
+import com.azure.core.http.HttpHeaderName;
+import com.azure.core.http.rest.RequestOptions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+
+/**
+ * Tests for TranscriptionAsyncClient (asynchronous client).
+ */
+class TranscriptionAsyncClientTest extends TranscriptionClientTestBase {
+
+ private final Boolean sync = false; // All tests in this file use the async client
+
+ /***********************************************************************************
+ *
+ * HAPPY PATH TESTS
+ *
+ ***********************************************************************************/
+
+ @Test
+ public void testTranscribeAsyncBasicFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options = new TranscriptionOptions((String) null);
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeAsyncWithLanguageFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options = new TranscriptionOptions((String) null).setLocales(Arrays.asList("en-US"));
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeAsyncWithMultipleLanguagesFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options
+ = new TranscriptionOptions((String) null).setLocales(Arrays.asList("en-US", "es-ES", "fr-FR"));
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeAsyncWithDiarizationFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionDiarizationOptions diarizationOptions = new TranscriptionDiarizationOptions().setMaxSpeakers(5);
+
+ TranscriptionOptions options
+ = new TranscriptionOptions((String) null).setDiarizationOptions(diarizationOptions);
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeAsyncWithProfanityFilterFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options
+ = new TranscriptionOptions((String) null).setProfanityFilterMode(ProfanityFilterMode.MASKED);
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeAsyncWithChannelsFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options = new TranscriptionOptions((String) null).setActiveChannels(Arrays.asList(0));
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeAsyncAllOptionsFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionDiarizationOptions diarizationOptions = new TranscriptionDiarizationOptions().setMaxSpeakers(5);
+
+ TranscriptionOptions options = new TranscriptionOptions((String) null).setLocales(Arrays.asList("en-US"))
+ .setDiarizationOptions(diarizationOptions)
+ .setProfanityFilterMode(ProfanityFilterMode.MASKED)
+ .setActiveChannels(Arrays.asList(0));
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeAsyncBasicFromFileWithResponse() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options = new TranscriptionOptions((String) null);
+ RequestOptions requestOptions
+ = new RequestOptions().addHeader(HttpHeaderName.fromString("x-custom-header"), "custom-value");
+
+ doTranscription(methodName, sync, true, audioFile, options, requestOptions);
+ }
+
+ @Test
+ public void testTranscribeAsyncWithAllOptionsFromFileWithResponse() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionDiarizationOptions diarizationOptions = new TranscriptionDiarizationOptions().setMaxSpeakers(5);
+
+ TranscriptionOptions options
+ = new TranscriptionOptions((String) null).setLocales(Arrays.asList("en-US", "es-ES"))
+ .setDiarizationOptions(diarizationOptions)
+ .setProfanityFilterMode(ProfanityFilterMode.REMOVED)
+ .setActiveChannels(Arrays.asList(0, 1));
+
+ RequestOptions requestOptions
+ = new RequestOptions().addHeader(HttpHeaderName.fromString("x-custom-header"), "custom-value")
+ .addQueryParam("test-param", "test-value");
+
+ doTranscription(methodName, sync, true, audioFile, options, requestOptions);
+ }
+
+ @Test
+ public void testTranscribeAsyncWithAudioUrl() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ // Using a publicly accessible sample audio file from Azure samples
+ String audioUrl
+ = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-speech-sdk/master/sampledata/audiofiles/aboutSpeechSdk.wav";
+ TranscriptionOptions options = new TranscriptionOptions(audioUrl).setLocales(Arrays.asList("en-US"));
+
+ // For URL-based transcription, we don't pass the local audio file path
+ doTranscriptionWithUrl(methodName, sync, options);
+ }
+
+ @Test
+ public void testTranscribeAsyncWithProfanityModeMasked() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options
+ = new TranscriptionOptions((String) null).setProfanityFilterMode(ProfanityFilterMode.MASKED);
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeAsyncWithProfanityModeRemoved() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options
+ = new TranscriptionOptions((String) null).setProfanityFilterMode(ProfanityFilterMode.REMOVED);
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeAsyncWithProfanityModeTags() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options
+ = new TranscriptionOptions((String) null).setProfanityFilterMode(ProfanityFilterMode.TAGS);
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ /***********************************************************************************
+ *
+ * ERROR HANDLING TESTS
+ *
+ ***********************************************************************************/
+
+ @Test
+ public void testTranscribeAsyncWithEmptyAudioData() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ // Test with minimal audio data - service should handle gracefully
+ TranscriptionOptions options = new TranscriptionOptions((String) null);
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeAsyncWithInvalidLanguageCode() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ // Use invalid language code to trigger service error
+ TranscriptionOptions options
+ = new TranscriptionOptions((String) null).setLocales(Arrays.asList("invalid-locale-code"));
+
+ // The service should return a 400 error for invalid locale
+ // doTranscription wraps exceptions in RuntimeException, so we catch that
+ try {
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ // Should not reach here - the above should throw an exception
+ throw new AssertionError("Expected RuntimeException with HttpResponseException cause but none was thrown");
+ } catch (RuntimeException e) {
+ // Expected behavior - verify the cause is HttpResponseException with 400 status
+ if (!(e.getCause() instanceof HttpResponseException)) {
+ throw new AssertionError(
+ "Expected RuntimeException cause to be HttpResponseException but got: " + e.getCause().getClass());
+ }
+ HttpResponseException httpException = (HttpResponseException) e.getCause();
+ if (httpException.getResponse().getStatusCode() != 400) {
+ throw new AssertionError(
+ "Expected 400 status code but got: " + httpException.getResponse().getStatusCode());
+ }
+ }
+ }
+
+ @Test
+ public void testTranscribeAsyncCancellation() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ // Test cancellation behavior with a normal transcription request
+ TranscriptionOptions options = new TranscriptionOptions((String) null);
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/TranscriptionClientTest.java b/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/TranscriptionClientTest.java
new file mode 100644
index 000000000000..0d9b261ccf44
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/TranscriptionClientTest.java
@@ -0,0 +1,226 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.speech.transcription;
+
+import com.azure.ai.speech.transcription.models.ProfanityFilterMode;
+import com.azure.ai.speech.transcription.models.TranscriptionDiarizationOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.core.http.HttpHeaderName;
+import com.azure.core.http.rest.RequestOptions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+/**
+ * Tests for TranscriptionClient (synchronous client).
+ */
+class TranscriptionClientTest extends TranscriptionClientTestBase {
+
+ private final Boolean sync = true; // All tests in this file use the sync client
+
+ /***********************************************************************************
+ *
+ * HAPPY PATH TESTS
+ *
+ ***********************************************************************************/
+
+ @Test
+ public void testTranscribeSyncBasicFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options = new TranscriptionOptions((String) null);
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeSyncWithLanguageFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options = new TranscriptionOptions((String) null).setLocales(Arrays.asList("en-US"));
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeSyncWithMultipleLanguagesFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options
+ = new TranscriptionOptions((String) null).setLocales(Arrays.asList("en-US", "es-ES", "fr-FR"));
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeSyncWithDiarizationFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionDiarizationOptions diarizationOptions = new TranscriptionDiarizationOptions().setMaxSpeakers(5);
+
+ TranscriptionOptions options
+ = new TranscriptionOptions((String) null).setDiarizationOptions(diarizationOptions);
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeSyncWithProfanityFilterFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options
+ = new TranscriptionOptions((String) null).setProfanityFilterMode(ProfanityFilterMode.MASKED);
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeSyncWithChannelsFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options = new TranscriptionOptions((String) null).setActiveChannels(Arrays.asList(0));
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeSyncAllOptionsFromFile() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionDiarizationOptions diarizationOptions = new TranscriptionDiarizationOptions().setMaxSpeakers(5);
+
+ TranscriptionOptions options = new TranscriptionOptions((String) null).setLocales(Arrays.asList("en-US"))
+ .setDiarizationOptions(diarizationOptions)
+ .setProfanityFilterMode(ProfanityFilterMode.MASKED)
+ .setActiveChannels(Arrays.asList(0));
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeSyncBasicFromFileWithResponse() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options = new TranscriptionOptions((String) null);
+ RequestOptions requestOptions
+ = new RequestOptions().addHeader(HttpHeaderName.fromString("x-custom-header"), "custom-value");
+
+ doTranscription(methodName, sync, true, audioFile, options, requestOptions);
+ }
+
+ @Test
+ public void testTranscribeSyncWithAllOptionsFromFileWithResponse() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionDiarizationOptions diarizationOptions = new TranscriptionDiarizationOptions().setMaxSpeakers(5);
+
+ TranscriptionOptions options
+ = new TranscriptionOptions((String) null).setLocales(Arrays.asList("en-US", "es-ES"))
+ .setDiarizationOptions(diarizationOptions)
+ .setProfanityFilterMode(ProfanityFilterMode.REMOVED)
+ .setActiveChannels(Arrays.asList(0, 1));
+
+ RequestOptions requestOptions
+ = new RequestOptions().addHeader(HttpHeaderName.fromString("x-custom-header"), "custom-value")
+ .addQueryParam("test-param", "test-value");
+
+ doTranscription(methodName, sync, true, audioFile, options, requestOptions);
+ }
+
+ @Test
+ public void testTranscribeSyncWithMultipleChannels() {
+ // Test with multiple channel indices
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ TranscriptionOptions options = new TranscriptionOptions((String) null).setActiveChannels(Arrays.asList(0, 1));
+
+ doTranscription(methodName, sync, false, audioFile, options, null);
+ }
+
+ @Test
+ public void testTranscribeSyncWithAudioUrl() {
+ createClient(true, true, sync);
+
+ String methodName = new Object() {
+ }.getClass().getEnclosingMethod().getName();
+
+ // Using a publicly accessible sample audio file from Azure samples
+ String audioUrl
+ = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-speech-sdk/master/sampledata/audiofiles/aboutSpeechSdk.wav";
+ TranscriptionOptions options = new TranscriptionOptions(audioUrl).setLocales(Arrays.asList("en-US"));
+
+ // For URL-based transcription, we don't pass the local audio file path
+ doTranscriptionWithUrl(methodName, sync, options);
+ }
+
+ /***********************************************************************************
+ *
+ * ERROR HANDLING TESTS
+ *
+ ***********************************************************************************/
+
+ @Test
+ public void testTranscribeSyncWithNullOptions() {
+ createClient(true, true, sync);
+
+ // Test that null options throws appropriate exception
+ assertThrows(NullPointerException.class, () -> {
+ getClient().transcribe((TranscriptionOptions) null);
+ }, "Transcribe should throw NullPointerException when options is null");
+ }
+
+ @Test
+ public void testTranscribeSyncWithEmptyAudioData() {
+ createClient(true, true, sync);
+
+ // Test with empty audio data - this should result in a service error
+ // Note: Depending on service behavior, this may throw HttpResponseException
+ // The exact behavior should be validated based on actual service responses
+ }
+
+ @Test
+ public void testTranscribeSyncWithInvalidLanguageCode() {
+ createClient(true, true, sync);
+
+ // Note: This test requires actual service call to verify behavior
+ // In PLAYBACK mode, this would replay the recorded error response
+ // Example implementation:
+ // TranscriptionOptions options = new TranscriptionOptions((String) null)
+ // .setLocales(Arrays.asList("invalid-locale"));
+ // doTranscription(methodName, sync, false, audioFile, options, null);
+ // The service should return an error for invalid locale
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/TranscriptionClientTestBase.java b/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/TranscriptionClientTestBase.java
new file mode 100644
index 000000000000..7a8b8c1ec0f8
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/TranscriptionClientTestBase.java
@@ -0,0 +1,337 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.speech.transcription;
+
+import com.azure.ai.speech.transcription.models.AudioFileDetails;
+import com.azure.ai.speech.transcription.models.TranscriptionOptions;
+import com.azure.ai.speech.transcription.models.TranscriptionResult;
+import com.azure.core.credential.KeyCredential;
+import com.azure.core.credential.TokenCredential;
+import com.azure.core.http.HttpRequest;
+import com.azure.core.http.policy.HttpLogDetailLevel;
+import com.azure.core.http.policy.HttpLogOptions;
+import com.azure.core.http.rest.RequestOptions;
+import com.azure.core.http.rest.Response;
+import com.azure.core.test.TestMode;
+import com.azure.core.test.TestProxyTestBase;
+import com.azure.core.util.BinaryData;
+import com.azure.core.util.Configuration;
+import com.azure.core.util.logging.ClientLogger;
+import com.azure.identity.DefaultAzureCredentialBuilder;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+
+import java.io.File;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Base class for TranscriptionClient tests. Contains helper methods and common test infrastructure.
+ * Supports both API Key (KeyCredential) and Entra ID (TokenCredential) authentication.
+ */
+class TranscriptionClientTestBase extends TestProxyTestBase {
+ private static final ClientLogger LOGGER = new ClientLogger(TranscriptionClientTestBase.class);
+
+ final Boolean printResults = false; // Set to true to print results to console window
+
+ // Sample audio file for testing
+ final String audioFile = "./src/test/java/com/azure/ai/speech/transcription/sample.wav";
+
+ // The clients that will be used for tests
+ private TranscriptionClient client = null;
+ private TranscriptionAsyncClient asyncClient = null;
+
+ /**
+ * Sets up the test resources before each test.
+ */
+ @BeforeEach
+ public void setupTest() {
+ // Reset clients before each test to ensure clean state
+ client = null;
+ asyncClient = null;
+ }
+
+ /**
+ * Cleans up test resources after each test.
+ */
+ @AfterEach
+ public void cleanupTest() {
+ // Clean up any resources if needed
+ // Note: The clients don't require explicit cleanup as they are managed by the test framework
+ }
+
+ /**
+ * Creates a client for testing.
+ *
+ * @param useKeyAuth Whether to use key-based authentication (true) or token-based authentication (false)
+ * @param useRealKey Whether to use a real key from environment variables (true) or a fake key (false).
+ * Only applies when useKeyAuth is true.
+ * @param sync Whether to create a synchronous client (true) or asynchronous client (false)
+ */
+ protected void createClient(Boolean useKeyAuth, Boolean useRealKey, Boolean sync) {
+ TestMode testMode = getTestMode();
+
+ // Define endpoint and auth credentials
+ String endpoint = "https://fake-resource-name.cognitiveservices.azure.com";
+ String key = "00000000000000000000000000000000";
+
+ if (testMode == TestMode.LIVE || testMode == TestMode.RECORD) {
+ endpoint = Configuration.getGlobalConfiguration().get("SPEECH_ENDPOINT");
+ assertTrue(endpoint != null && !endpoint.isEmpty(), "Endpoint URL is required to run live tests.");
+
+ if (useKeyAuth && useRealKey) {
+ key = Configuration.getGlobalConfiguration().get("SPEECH_API_KEY");
+ assertTrue(key != null && !key.isEmpty(), "API key is required to run live tests with KeyCredential.");
+ }
+ }
+
+ // Create the client builder
+ TranscriptionClientBuilder transcriptionClientBuilder = new TranscriptionClientBuilder().endpoint(endpoint)
+ .httpLogOptions(new HttpLogOptions().setLogLevel(HttpLogDetailLevel.BODY_AND_HEADERS));
+
+ // Update the client builder with credentials and recording/playback policies
+ if (getTestMode() == TestMode.LIVE) {
+ if (useKeyAuth) {
+ transcriptionClientBuilder.credential(new KeyCredential(key));
+ } else {
+ // Use Entra ID authentication (TokenCredential)
+ TokenCredential credential = new DefaultAzureCredentialBuilder().build();
+ transcriptionClientBuilder.credential(credential);
+ }
+ } else if (getTestMode() == TestMode.RECORD) {
+ transcriptionClientBuilder.addPolicy(interceptorManager.getRecordPolicy());
+ if (useKeyAuth) {
+ transcriptionClientBuilder.credential(new KeyCredential(key));
+ } else {
+ TokenCredential credential = new DefaultAzureCredentialBuilder().build();
+ transcriptionClientBuilder.credential(credential);
+ }
+ } else if (getTestMode() == TestMode.PLAYBACK) {
+ transcriptionClientBuilder.httpClient(interceptorManager.getPlaybackClient());
+ // In playback mode, use a fake key regardless of authentication method
+ transcriptionClientBuilder.credential(new KeyCredential(key));
+ }
+
+ // Configure sanitizers - must be done after registering the record policy or playback client
+ if (!interceptorManager.isLiveMode()) {
+ // Remove default sanitizers that would interfere with Speech service recordings:
+ // - AZSDK3430 (id sanitizer): Preserve resource identifiers needed for request matching
+ // - AZSDK3493 (name sanitizer): Preserve resource names needed for request matching
+ // - AZSDK2003, AZSDK2030: URI-related sanitizers that may affect Speech endpoints
+ interceptorManager.removeSanitizers("AZSDK2003", "AZSDK2030", "AZSDK3430", "AZSDK3493");
+ }
+
+ if (sync) {
+ client = transcriptionClientBuilder.buildClient();
+ } else {
+ asyncClient = transcriptionClientBuilder.buildAsyncClient();
+ }
+ }
+
+ /**
+ * Performs transcription with audio URL and validates the result.
+ *
+ * @param testName A label that uniquely defines the test. Used in console printout.
+ * @param sync 'true' to use synchronous client, 'false' to use asynchronous client.
+ * @param options TranscriptionOptions with audioUrl set
+ */
+ protected void doTranscriptionWithUrl(String testName, Boolean sync, TranscriptionOptions options) {
+ try {
+ // Verify that audioUrl is set
+ assertNotNull(options.getAudioUrl(), "AudioUrl must be set for URL-based transcription");
+ assertFalse(options.getAudioUrl().isEmpty(), "AudioUrl must not be empty");
+
+ TranscriptionResult result = null;
+ if (sync) {
+ result = client.transcribe(options);
+ } else {
+ result = asyncClient.transcribe(options).block();
+ }
+
+ validateTranscriptionResult(testName, result);
+ } catch (Exception e) {
+ LOGGER.error("Error in test {}: {}", testName, e.getMessage());
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Performs transcription and validates the result.
+ *
+ * @param testName A label that uniquely defines the test. Used in console printout.
+ * @param sync 'true' to use synchronous client, 'false' to use asynchronous client.
+ * @param transcribeWithResponse 'true' to use transcribeWithResponse(), 'false' to use transcribe().
+ * @param audioFilePath Path to the audio file to transcribe
+ * @param options TranscriptionOptions (can be null)
+ * @param requestOptions RequestOptions (can be null)
+ */
+ protected void doTranscription(String testName, Boolean sync, Boolean transcribeWithResponse, String audioFilePath,
+ TranscriptionOptions options, RequestOptions requestOptions) {
+
+ try {
+ // Load audio file
+ byte[] audioData = Files.readAllBytes(Paths.get(audioFilePath));
+ AudioFileDetails audioFileDetails
+ = new AudioFileDetails(BinaryData.fromBytes(audioData)).setFilename(new File(audioFilePath).getName());
+
+ // Create new options with audio file details if options is currently using URL or null
+ if (options.getAudioUrl() == null) {
+ // Options was created with null, need to create a new one with audio file details
+ options = new TranscriptionOptions(audioFileDetails).setLocales(options.getLocales())
+ .setLocaleModelMapping(options.getLocaleModelMapping())
+ .setProfanityFilterMode(options.getProfanityFilterMode())
+ .setDiarizationOptions(options.getDiarizationOptions())
+ .setActiveChannels(options.getActiveChannels())
+ .setEnhancedModeOptions(options.getEnhancedModeOptions())
+ .setPhraseListOptions(options.getPhraseListOptions());
+ }
+
+ if (sync) {
+ TranscriptionResult result = null;
+ if (!transcribeWithResponse) {
+ result = client.transcribe(options);
+ } else {
+ if (requestOptions == null) {
+ // Use the new transcribeWithResponse(TranscriptionOptions) convenience method
+ Response response = client.transcribeWithResponse(options);
+ printHttpRequestAndResponse(response);
+ result = response.getValue();
+ } else {
+ // When custom RequestOptions are needed, use the lower-level API
+ BinaryData multipartBody
+ = new com.azure.ai.speech.transcription.implementation.MultipartFormDataHelper(
+ requestOptions)
+ .serializeJsonField("definition", options)
+ .serializeFileField("audio", audioFileDetails.getContent(),
+ audioFileDetails.getContentType(), audioFileDetails.getFilename())
+ .end()
+ .getRequestBody();
+ Response response = client.transcribeWithResponse(multipartBody, requestOptions);
+ printHttpRequestAndResponse(response);
+ result = response.getValue().toObject(TranscriptionResult.class);
+ }
+ }
+ validateTranscriptionResult(testName, result);
+ } else {
+ TranscriptionResult result = null;
+ if (!transcribeWithResponse) {
+ result = asyncClient.transcribe(options).block();
+ } else {
+ if (requestOptions == null) {
+ // Use the new transcribeWithResponse(TranscriptionOptions) convenience method
+ Response response = asyncClient.transcribeWithResponse(options).block();
+ printHttpRequestAndResponse(response);
+ result = response.getValue();
+ } else {
+ // When custom RequestOptions are needed, use the lower-level API
+ BinaryData multipartBody
+ = new com.azure.ai.speech.transcription.implementation.MultipartFormDataHelper(
+ requestOptions)
+ .serializeJsonField("definition", options)
+ .serializeFileField("audio", audioFileDetails.getContent(),
+ audioFileDetails.getContentType(), audioFileDetails.getFilename())
+ .end()
+ .getRequestBody();
+ Response response
+ = asyncClient.transcribeWithResponse(multipartBody, requestOptions).block();
+ printHttpRequestAndResponse(response);
+ result = response.getValue().toObject(TranscriptionResult.class);
+ }
+ }
+ validateTranscriptionResult(testName, result);
+ }
+ } catch (Exception e) {
+ LOGGER.error("Error in test {}: {}", testName, e.getMessage());
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Validates the transcription result.
+ *
+ * @param testName The name of the test
+ * @param result The transcription result to validate
+ */
+ protected void validateTranscriptionResult(String testName, TranscriptionResult result) {
+ if (printResults) {
+ System.out.println("\n===== Test: " + testName + " =====");
+ System.out.println("Duration: " + result.getDuration() + "ms");
+ if (result.getCombinedPhrases() != null) {
+ result.getCombinedPhrases().forEach(phrase -> {
+ System.out.println("Channel " + phrase.getChannel() + ": " + phrase.getText());
+ });
+ }
+ if (result.getPhrases() != null) {
+ result.getPhrases().forEach(phrase -> {
+ System.out.println("Phrase: " + phrase.getText() + " (confidence: " + phrase.getConfidence() + ")");
+ });
+ }
+ }
+
+ // Basic validation
+ assertNotNull(result, "Transcription result should not be null");
+ assertNotNull(result.getDuration(), "Duration should not be null");
+ assertTrue(result.getDuration().toMillis() > 0, "Duration should be greater than 0");
+ assertNotNull(result.getCombinedPhrases(), "Combined phrases should not be null");
+ assertFalse(result.getCombinedPhrases().isEmpty(), "Combined phrases should not be empty");
+ assertNotNull(result.getPhrases(), "Phrases should not be null");
+ assertFalse(result.getPhrases().isEmpty(), "Phrases should not be empty");
+
+ // Validate combined phrases
+ result.getCombinedPhrases().forEach(phrase -> {
+ assertNotNull(phrase.getText(), "Combined phrase text should not be null");
+ assertFalse(phrase.getText().isEmpty(), "Combined phrase text should not be empty");
+ });
+
+ // Validate phrases
+ result.getPhrases().forEach(phrase -> {
+ assertNotNull(phrase.getText(), "Phrase text should not be null");
+ assertFalse(phrase.getText().isEmpty(), "Phrase text should not be empty");
+ assertTrue(phrase.getConfidence() >= 0 && phrase.getConfidence() <= 1,
+ "Confidence should be between 0 and 1");
+ assertTrue(phrase.getOffset() >= 0, "Offset should be non-negative");
+ assertTrue(phrase.getDuration().toMillis() > 0, "Phrase duration should be positive");
+ });
+ }
+
+ /**
+ * Prints HTTP request and response details for debugging.
+ *
+ * @param response The HTTP response
+ */
+ protected void printHttpRequestAndResponse(Response> response) {
+ if (printResults) {
+ HttpRequest request = response.getRequest();
+ System.out.println("\n===== HTTP Request =====");
+ System.out.println(request.getHttpMethod() + " " + request.getUrl());
+ request.getHeaders().forEach(header -> System.out.println(header.getName() + ": " + header.getValue()));
+
+ System.out.println("\n===== HTTP Response =====");
+ System.out.println("Status Code: " + response.getStatusCode());
+ response.getHeaders().forEach(header -> System.out.println(header.getName() + ": " + header.getValue()));
+ }
+ }
+
+ /**
+ * Gets the synchronous client.
+ *
+ * @return The TranscriptionClient
+ */
+ protected TranscriptionClient getClient() {
+ return client;
+ }
+
+ /**
+ * Gets the asynchronous client.
+ *
+ * @return The TranscriptionAsyncClient
+ */
+ protected TranscriptionAsyncClient getAsyncClient() {
+ return asyncClient;
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/generated/TranscriptionClientTestBase.java b/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/generated/TranscriptionClientTestBase.java
new file mode 100644
index 000000000000..2e70366d4ec1
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/generated/TranscriptionClientTestBase.java
@@ -0,0 +1,41 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.speech.transcription.generated;
+
+// The Java test files under 'generated' package are generated for your reference.
+// If you wish to modify these files, please copy them out of the 'generated' package, and modify there.
+// See https://aka.ms/azsdk/dpg/java/tests for guide on adding a test.
+
+import com.azure.ai.speech.transcription.TranscriptionClient;
+import com.azure.ai.speech.transcription.TranscriptionClientBuilder;
+import com.azure.core.http.policy.HttpLogDetailLevel;
+import com.azure.core.http.policy.HttpLogOptions;
+import com.azure.core.test.TestMode;
+import com.azure.core.test.TestProxyTestBase;
+import com.azure.core.test.utils.MockTokenCredential;
+import com.azure.core.util.Configuration;
+import com.azure.identity.DefaultAzureCredentialBuilder;
+
+class TranscriptionClientTestBase extends TestProxyTestBase {
+ protected TranscriptionClient transcriptionClient;
+
+ @Override
+ protected void beforeTest() {
+ TranscriptionClientBuilder transcriptionClientbuilder = new TranscriptionClientBuilder()
+ .endpoint(Configuration.getGlobalConfiguration().get("ENDPOINT", "endpoint"))
+ .httpClient(getHttpClientOrUsePlayback(getHttpClients().findFirst().orElse(null)))
+ .httpLogOptions(new HttpLogOptions().setLogLevel(HttpLogDetailLevel.BASIC));
+ if (getTestMode() == TestMode.PLAYBACK) {
+ transcriptionClientbuilder.credential(new MockTokenCredential());
+ } else if (getTestMode() == TestMode.RECORD) {
+ transcriptionClientbuilder.addPolicy(interceptorManager.getRecordPolicy())
+ .credential(new DefaultAzureCredentialBuilder().build());
+ } else if (getTestMode() == TestMode.LIVE) {
+ transcriptionClientbuilder.credential(new DefaultAzureCredentialBuilder().build());
+ }
+ transcriptionClient = transcriptionClientbuilder.buildClient();
+
+ }
+}
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/sample.wav b/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/sample.wav
new file mode 100644
index 000000000000..bf23d54b0c00
Binary files /dev/null and b/sdk/transcription/azure-ai-speech-transcription/src/test/java/com/azure/ai/speech/transcription/sample.wav differ
diff --git a/sdk/transcription/azure-ai-speech-transcription/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/sdk/transcription/azure-ai-speech-transcription/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker
new file mode 100644
index 000000000000..1f0955d450f0
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker
@@ -0,0 +1 @@
+mock-maker-inline
diff --git a/sdk/transcription/azure-ai-speech-transcription/tsp-location.yaml b/sdk/transcription/azure-ai-speech-transcription/tsp-location.yaml
new file mode 100644
index 000000000000..ed7b497e5fa9
--- /dev/null
+++ b/sdk/transcription/azure-ai-speech-transcription/tsp-location.yaml
@@ -0,0 +1,4 @@
+directory: specification/cognitiveservices/Speech.Transcription
+commit: 67019b34b001ae6b8429ee983b9697465d721d0b
+repo: Azure/azure-rest-api-specs
+additionalDirectories:
diff --git a/sdk/transcription/ci.yml b/sdk/transcription/ci.yml
new file mode 100644
index 000000000000..bbf4e187a7f2
--- /dev/null
+++ b/sdk/transcription/ci.yml
@@ -0,0 +1,46 @@
+# NOTE: Please refer to https://aka.ms/azsdk/engsys/ci-yaml before editing this file.
+
+trigger:
+ branches:
+ include:
+ - main
+ - hotfix/*
+ - release/*
+ paths:
+ include:
+ - sdk/transcription/ci.yml
+ - sdk/transcription/azure-ai-speech-transcription/
+ exclude:
+ - sdk/transcription/pom.xml
+ - sdk/transcription/azure-ai-speech-transcription/pom.xml
+
+pr:
+ branches:
+ include:
+ - main
+ - feature/*
+ - hotfix/*
+ - release/*
+ paths:
+ include:
+ - sdk/transcription/ci.yml
+ - sdk/transcription/azure-ai-speech-transcription/
+ exclude:
+ - sdk/transcription/pom.xml
+ - sdk/transcription/azure-ai-speech-transcription/pom.xml
+
+parameters:
+ - name: release_azureaispeechtranscription
+ displayName: "azure-ai-speech-transcription"
+ type: boolean
+ default: true
+
+extends:
+ template: ../../eng/pipelines/templates/stages/archetype-sdk-client.yml
+ parameters:
+ ServiceDirectory: transcription
+ Artifacts:
+ - name: azure-ai-speech-transcription
+ groupId: com.azure
+ safeName: azureaispeechtranscription
+ releaseInBatch: ${{ parameters.release_azureaispeechtranscription }}
diff --git a/sdk/transcription/pom.xml b/sdk/transcription/pom.xml
new file mode 100644
index 000000000000..5736ddc377da
--- /dev/null
+++ b/sdk/transcription/pom.xml
@@ -0,0 +1,14 @@
+
+
+ 4.0.0
+ com.azure
+ azure-transcription-service
+ pom
+ 1.0.0
+
+ azure-ai-speech-transcription
+
+