Skip to content
This repository has been archived by the owner on Jun 6, 2024. It is now read-only.

Add Audio APIs #292

Merged
merged 3 commits into from
Jun 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package com.theokanning.openai.audio;

import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.*;

/**
* A request for OpenAi to create transcription based on an audio file
* All fields except model are optional
*
* https://platform.openai.com/docs/api-reference/audio/create
*/
@Builder
@NoArgsConstructor
@AllArgsConstructor
@Data
public class CreateTranscriptionRequest {

/**
* The name of the model to use.
*/
@NonNull
String model;

/**
* An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.
*/
String prompt;

/**
* The format of the transcript output, in one of these options: json or verbose_json
*/
@JsonProperty("response_format")
String responseFormat;

/**
* The sampling temperature, between 0 and 1.
* Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
* If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.
*/
Double temperature;

/**
* The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.
*/
String language;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.theokanning.openai.audio;

import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.*;

/**
* A request for OpenAi to create English translation based on an audio file
* All fields except model are optional
*
* https://platform.openai.com/docs/api-reference/audio/create
*/
@Builder
@NoArgsConstructor
@AllArgsConstructor
@Data
public class CreateTranslationRequest {

/**
* The name of the model to use.
*/
@NonNull
String model;

/**
* An optional text to guide the model's style or continue a previous audio segment. The prompt should be in English.
*/
String prompt;

/**
* The format of the translated output, in one of these options: json or verbose_json
*/
@JsonProperty("response_format")
String responseFormat;

/**
* The sampling temperature, between 0 and 1.
* Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
* If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.
*/
Double temperature;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package com.theokanning.openai.audio;

import lombok.Data;

import java.util.List;

/**
* An object with the text transcription
*
* https://platform.openai.com/docs/api-reference/audio/create
*/
@Data
public class TranscriptionResult {

/**
* The text transcription.
*/
String text;

/**
* Task name
* @apiNote verbose_json response format only
*/
String task;

/**
* Speech language
* @apiNote verbose_json response format only
*/
String language;

/**
* Speech duration
* @apiNote verbose_json response format only
*/
Double duration;

/**
* List of segments
* @apiNote verbose_json response format only
*/
List<TranscriptionSegment> segments;

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.theokanning.openai.audio;

import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;

import java.util.List;

/**
* An object represents transcription segment
*
* https://platform.openai.com/docs/api-reference/audio/create
*/
@Data
public class TranscriptionSegment {

Integer id;
Integer seek;
Double start;
Double end;
String text;
List<Integer> tokens;
Double temperature;
@JsonProperty("avg_logprob")
Double averageLogProb;
@JsonProperty("compression_ratio")
Double compressionRatio;
@JsonProperty("no_speech_prob")
Double noSpeechProb;
@JsonProperty("transient")
Boolean transientFlag;

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package com.theokanning.openai.audio;

import lombok.Data;

import java.util.List;

/**
* An object with the English transcription
*
* https://platform.openai.com/docs/api-reference/audio/create
*/
@Data
public class TranslationResult {

/**
* Translated text.
*/
String text;

/**
* Task name
* @apiNote verbose_json response format only
*/
String task;

/**
* Translated language
* @apiNote verbose_json response format only
*/
String language;

/**
* Speech duration
* @apiNote verbose_json response format only
*/
Double duration;

/**
* List of segments
* @apiNote verbose_json response format only
*/
List<TranscriptionSegment> segments;

}
4 changes: 4 additions & 0 deletions api/src/test/java/com/theokanning/openai/JsonTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.*;
import com.theokanning.openai.audio.TranscriptionResult;
import com.theokanning.openai.audio.TranslationResult;
import com.theokanning.openai.completion.chat.ChatCompletionRequest;
import com.theokanning.openai.completion.chat.ChatCompletionResult;
import com.theokanning.openai.edit.EditRequest;
Expand Down Expand Up @@ -42,6 +44,8 @@ public class JsonTest {
FineTuneEvent.class,
FineTuneResult.class,
ImageResult.class,
TranscriptionResult.class,
TranslationResult.class,
Model.class,
ModerationRequest.class,
ModerationResult.class
Expand Down
27 changes: 27 additions & 0 deletions api/src/test/resources/fixtures/TranscriptionResult.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"task": "transcribe",
"language": "english",
"duration": 1.1,
"segments": [
{
"id": 0,
"seek": 0,
"start": 0.0,
"end": 0.96,
"text": " Hello World.",
"tokens": [
50364,
2425,
3937,
13,
50412
],
"temperature": 0.0,
"avg_logprob": -0.7308251063028971,
"compression_ratio": 0.6,
"no_speech_prob": 0.015335720032453537,
"transient": false
}
],
"text": "Hello World."
}
37 changes: 37 additions & 0 deletions api/src/test/resources/fixtures/TranslationResult.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"task": "translate",
"language": "english",
"duration": 4.38,
"segments": [
{
"id": 0,
"seek": 0,
"start": 0.0,
"end": 4.32,
"text": " Hello, my name is Yuna. I am Korean voice.",
"tokens": [
50364,
2425,
11,
452,
1315,
307,
398,
5051,
13,
286,
669,
6933,
3177,
13,
50580
],
"temperature": 0.0,
"avg_logprob": -0.6644304394721985,
"compression_ratio": 0.84,
"no_speech_prob": 0.006824055220931768,
"transient": false
}
],
"text": "Hello, my name is Yuna. I am Korean voice."
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
import com.theokanning.openai.DeleteResult;
import com.theokanning.openai.OpenAiError;
import com.theokanning.openai.OpenAiHttpException;
import com.theokanning.openai.audio.CreateTranscriptionRequest;
import com.theokanning.openai.audio.CreateTranslationRequest;
import com.theokanning.openai.audio.TranscriptionResult;
import com.theokanning.openai.audio.TranslationResult;
import com.theokanning.openai.client.OpenAiApi;
import com.theokanning.openai.completion.CompletionChunk;
import com.theokanning.openai.completion.CompletionRequest;
Expand Down Expand Up @@ -248,6 +252,61 @@ public ImageResult createImageVariation(CreateImageVariationRequest request, jav
return execute(api.createImageVariation(builder.build()));
}

public TranscriptionResult createTranscription(CreateTranscriptionRequest request, String audioPath) {
java.io.File audio = new java.io.File(audioPath);
return createTranscription(request, audio);
}

public TranscriptionResult createTranscription(CreateTranscriptionRequest request, java.io.File audio) {
RequestBody audioBody = RequestBody.create(MediaType.parse("audio"), audio);

MultipartBody.Builder builder = new MultipartBody.Builder()
.setType(MediaType.get("multipart/form-data"))
.addFormDataPart("model", request.getModel())
.addFormDataPart("file", audio.getName(), audioBody);

if (request.getPrompt() != null) {
builder.addFormDataPart("prompt", request.getPrompt());
}
if (request.getResponseFormat() != null) {
builder.addFormDataPart("response_format", request.getResponseFormat());
}
if (request.getTemperature() != null) {
builder.addFormDataPart("temperature", request.getTemperature().toString());
}
if (request.getLanguage() != null) {
builder.addFormDataPart("language", request.getLanguage());
}

return execute(api.createTranscription(builder.build()));
}

public TranslationResult createTranslation(CreateTranslationRequest request, String audioPath) {
java.io.File audio = new java.io.File(audioPath);
return createTranslation(request, audio);
}

public TranslationResult createTranslation(CreateTranslationRequest request, java.io.File audio) {
RequestBody audioBody = RequestBody.create(MediaType.parse("audio"), audio);

MultipartBody.Builder builder = new MultipartBody.Builder()
.setType(MediaType.get("multipart/form-data"))
.addFormDataPart("model", request.getModel())
.addFormDataPart("file", audio.getName(), audioBody);

if (request.getPrompt() != null) {
builder.addFormDataPart("prompt", request.getPrompt());
}
if (request.getResponseFormat() != null) {
builder.addFormDataPart("response_format", request.getResponseFormat());
}
if (request.getTemperature() != null) {
builder.addFormDataPart("temperature", request.getTemperature().toString());
}

return execute(api.createTranslation(builder.build()));
}

public ModerationResult createModeration(ModerationRequest request) {
return execute(api.createModeration(request));
}
Expand Down
Loading