@@ -69,9 +69,10 @@ def create(
6969        model : Union [str , AudioModel ],
7070        chunking_strategy : Optional [transcription_create_params .ChunkingStrategy ] |  Omit  =  omit ,
7171        include : List [TranscriptionInclude ] |  Omit  =  omit ,
72-         response_format : Union [Literal ["json" ], Omit ] =  omit ,
7372        language : str  |  Omit  =  omit ,
7473        prompt : str  |  Omit  =  omit ,
74+         response_format : Union [Literal ["json" ], Omit ] =  omit ,
75+         stream : Optional [Literal [False ]] |  Omit  =  omit ,
7576        temperature : float  |  Omit  =  omit ,
7677        timestamp_granularities : List [Literal ["word" , "segment" ]] |  Omit  =  omit ,
7778        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. 
@@ -80,7 +81,68 @@ def create(
8081        extra_query : Query  |  None  =  None ,
8182        extra_body : Body  |  None  =  None ,
8283        timeout : float  |  httpx .Timeout  |  None  |  NotGiven  =  not_given ,
83-     ) ->  Transcription : ...
84+     ) ->  TranscriptionCreateResponse :
85+         """ 
86+         Transcribes audio into the input language. 
87+ 
88+         Args: 
89+           file: 
90+               The audio file object (not file name) to transcribe, in one of these formats: 
91+               flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. 
92+ 
93+           model: ID of the model to use. The options are `gpt-4o-transcribe`, 
94+               `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source 
95+               Whisper V2 model). 
96+ 
97+           chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server 
98+               first normalizes loudness and then uses voice activity detection (VAD) to choose 
99+               boundaries. `server_vad` object can be provided to tweak VAD detection 
100+               parameters manually. If unset, the audio is transcribed as a single block. 
101+ 
102+           include: Additional information to include in the transcription response. `logprobs` will 
103+               return the log probabilities of the tokens in the response to understand the 
104+               model's confidence in the transcription. `logprobs` only works with 
105+               response_format set to `json` and only with the models `gpt-4o-transcribe` and 
106+               `gpt-4o-mini-transcribe`. 
107+ 
108+           language: The language of the input audio. Supplying the input language in 
109+               [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) 
110+               format will improve accuracy and latency. 
111+ 
112+           prompt: An optional text to guide the model's style or continue a previous audio 
113+               segment. The 
114+               [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) 
115+               should match the audio language. 
116+ 
117+           response_format: The format of the output, in one of these options: `json`, `text`, `srt`, 
118+               `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, 
119+               the only supported format is `json`. 
120+ 
121+           stream: If set to true, the model response data will be streamed to the client as it is 
122+               generated using 
123+               [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format). 
124+               See the 
125+               [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions) 
126+               for more information. 
127+ 
128+               Note: Streaming is not supported for the `whisper-1` model and will be ignored. 
129+ 
130+           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the 
131+               output more random, while lower values like 0.2 will make it more focused and 
132+               deterministic. If set to 0, the model will use 
133+               [log probability](https://en.wikipedia.org/wiki/Log_probability) to 
134+               automatically increase the temperature until certain thresholds are hit. 
135+ 
136+           timestamp_granularities: The timestamp granularities to populate for this transcription. 
137+               `response_format` must be set `verbose_json` to use timestamp granularities. 
138+               Either or both of these options are supported: `word`, or `segment`. Note: There 
139+               is no additional latency for segment timestamps, but generating word timestamps 
140+               incurs additional latency. 
141+ 
142+           extra_headers: Send extra headers 
143+ 
144+           extra_query: Add additional query parameters to the request 
145+         """ 
84146
85147    @overload  
86148    def  create (
0 commit comments