Skip to content

Commit 8dcfe8b

Browse files
dtmeadowsstainless-app[bot]
authored andcommitted
copy over translations non-streaming + json def from async
1 parent e01f14b commit 8dcfe8b

File tree

1 file changed

+64
-2
lines changed

1 file changed

+64
-2
lines changed

src/openai/resources/audio/transcriptions.py

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,10 @@ def create(
6969
model: Union[str, AudioModel],
7070
chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | Omit = omit,
7171
include: List[TranscriptionInclude] | Omit = omit,
72-
response_format: Union[Literal["json"], Omit] = omit,
7372
language: str | Omit = omit,
7473
prompt: str | Omit = omit,
74+
response_format: Union[Literal["json"], Omit] = omit,
75+
stream: Optional[Literal[False]] | Omit = omit,
7576
temperature: float | Omit = omit,
7677
timestamp_granularities: List[Literal["word", "segment"]] | Omit = omit,
7778
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -80,7 +81,68 @@ def create(
8081
extra_query: Query | None = None,
8182
extra_body: Body | None = None,
8283
timeout: float | httpx.Timeout | None | NotGiven = not_given,
83-
) -> Transcription: ...
84+
) -> TranscriptionCreateResponse:
85+
"""
86+
Transcribes audio into the input language.
87+
88+
Args:
89+
file:
90+
The audio file object (not file name) to transcribe, in one of these formats:
91+
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
92+
93+
model: ID of the model to use. The options are `gpt-4o-transcribe`,
94+
`gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
95+
Whisper V2 model).
96+
97+
chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
98+
first normalizes loudness and then uses voice activity detection (VAD) to choose
99+
boundaries. `server_vad` object can be provided to tweak VAD detection
100+
parameters manually. If unset, the audio is transcribed as a single block.
101+
102+
include: Additional information to include in the transcription response. `logprobs` will
103+
return the log probabilities of the tokens in the response to understand the
104+
model's confidence in the transcription. `logprobs` only works with
105+
response_format set to `json` and only with the models `gpt-4o-transcribe` and
106+
`gpt-4o-mini-transcribe`.
107+
108+
language: The language of the input audio. Supplying the input language in
109+
[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
110+
format will improve accuracy and latency.
111+
112+
prompt: An optional text to guide the model's style or continue a previous audio
113+
segment. The
114+
[prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
115+
should match the audio language.
116+
117+
response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
118+
`verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
119+
the only supported format is `json`.
120+
121+
stream: If set to true, the model response data will be streamed to the client as it is
122+
generated using
123+
[server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
124+
See the
125+
[Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
126+
for more information.
127+
128+
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
129+
130+
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
131+
output more random, while lower values like 0.2 will make it more focused and
132+
deterministic. If set to 0, the model will use
133+
[log probability](https://en.wikipedia.org/wiki/Log_probability) to
134+
automatically increase the temperature until certain thresholds are hit.
135+
136+
timestamp_granularities: The timestamp granularities to populate for this transcription.
137+
`response_format` must be set `verbose_json` to use timestamp granularities.
138+
Either or both of these options are supported: `word`, or `segment`. Note: There
139+
is no additional latency for segment timestamps, but generating word timestamps
140+
incurs additional latency.
141+
142+
extra_headers: Send extra headers
143+
144+
extra_query: Add additional query parameters to the request
145+
"""
84146

85147
@overload
86148
def create(

0 commit comments

Comments
 (0)