-
Notifications
You must be signed in to change notification settings - Fork 4
/
audio.go
39 lines (35 loc) · 1.76 KB
/
audio.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
package openai
import (
"context"
"os"
"github.com/fabiustech/openai/audio"
"github.com/fabiustech/openai/models"
)
// AudioTranscriptionRequest is the request body for the audio/transcriptions endpoint.
type AudioTranscriptionRequest struct {
// File is the audio file object (not file name) to transcribe, in one of these formats:
// mp3, mp4, mpeg, mpga, m4a, wav, or webm.
File *os.File
// Model is the ID of the model to use. Only whisper-1 is currently available.
Model models.Audio
// Prompt is optional text to guide the model's style or continue a previous audio segment. The prompt should match
// the audio language.
Prompt *string
// ResponseFormat is the format of the transcript output, in one of these options:
// json, text, srt, verbose_json, or vtt.
ResponseFormat *audio.Format
// Temperature is he sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random,
// while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log
// probability to automatically increase the temperature until certain thresholds are hit.
Temperature *float64
// Language is the language of the input audio. Supplying the input language in ISO-639-1 format will improve
// accuracy and latency.
Language *string
}
// TranscribeAudioFile creates a new audio file transcription request. File uploads are currently limited to 25 MB
// and the following input file types are supported:mp3, mp4, mpeg, mpga, m4a, wav, and webm.
// The returned []byte is the raw response from the API (as the response format changes depending on the contents of
// the request).
func (c *Client) TranscribeAudioFile(ctx context.Context, ar *AudioTranscriptionRequest) ([]byte, error) {
return c.postAudio(ctx, ar)
}