|
1 | | -import type { TextGenerationInput, TextGenerationOutput } from "@huggingface/tasks/src/tasks/text-generation/inference"; |
2 | 1 | import { InferenceOutputError } from "../../lib/InferenceOutputError"; |
3 | 2 | import type { BaseArgs, Options } from "../../types"; |
4 | 3 | import { request } from "../custom/request"; |
5 | 4 |
|
| 5 | +/** |
| 6 | + * Inputs for Text Generation inference |
| 7 | + */ |
| 8 | +export interface TextGenerationInput { |
| 9 | + /** |
| 10 | + * The text to initialize generation with |
| 11 | + */ |
| 12 | + inputs: string; |
| 13 | + /** |
| 14 | + * Additional inference parameters |
| 15 | + */ |
| 16 | + parameters?: TextGenerationParameters; |
| 17 | + /** |
| 18 | + * Whether to stream output tokens |
| 19 | + */ |
| 20 | + stream?: boolean; |
| 21 | + [property: string]: unknown; |
| 22 | +} |
| 23 | + |
| 24 | +/** |
| 25 | + * Additional inference parameters |
| 26 | + * |
| 27 | + * Additional inference parameters for Text Generation |
| 28 | + */ |
| 29 | +export interface TextGenerationParameters { |
| 30 | + /** |
| 31 | + * The number of sampling queries to run. Only the best one (in terms of total logprob) will |
| 32 | + * be returned. |
| 33 | + */ |
| 34 | + best_of?: number; |
| 35 | + /** |
| 36 | + * Whether or not to output decoder input details |
| 37 | + */ |
| 38 | + decoder_input_details?: boolean; |
| 39 | + /** |
| 40 | + * Whether or not to output details |
| 41 | + */ |
| 42 | + details?: boolean; |
| 43 | + /** |
| 44 | + * Whether to use logits sampling instead of greedy decoding when generating new tokens. |
| 45 | + */ |
| 46 | + do_sample?: boolean; |
| 47 | + /** |
| 48 | + * The maximum number of tokens to generate. |
| 49 | + */ |
| 50 | + max_new_tokens?: number; |
| 51 | + /** |
| 52 | + * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this |
| 53 | + * paper](https://hf.co/papers/1909.05858) for more details. |
| 54 | + */ |
| 55 | + repetition_penalty?: number; |
| 56 | + /** |
| 57 | + * Whether to prepend the prompt to the generated text. |
| 58 | + */ |
| 59 | + return_full_text?: boolean; |
| 60 | + /** |
| 61 | + * The random sampling seed. |
| 62 | + */ |
| 63 | + seed?: number; |
| 64 | + /** |
| 65 | + * Stop generating tokens if a member of `stop_sequences` is generated. |
| 66 | + */ |
| 67 | + stop_sequences?: string[]; |
| 68 | + /** |
| 69 | + * The value used to modulate the logits distribution. |
| 70 | + */ |
| 71 | + temperature?: number; |
| 72 | + /** |
| 73 | + * The number of highest probability vocabulary tokens to keep for top-k-filtering. |
| 74 | + */ |
| 75 | + top_k?: number; |
| 76 | + /** |
| 77 | + * If set to < 1, only the smallest set of most probable tokens with probabilities that add |
| 78 | + * up to `top_p` or higher are kept for generation. |
| 79 | + */ |
| 80 | + top_p?: number; |
| 81 | + /** |
| 82 | + * Truncate input tokens to the given size. |
| 83 | + */ |
| 84 | + truncate?: number; |
| 85 | + /** |
| 86 | + * Typical Decoding mass. See [Typical Decoding for Natural Language |
| 87 | + * Generation](https://hf.co/papers/2202.00666) for more information |
| 88 | + */ |
| 89 | + typical_p?: number; |
| 90 | + /** |
| 91 | + * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226) |
| 92 | + */ |
| 93 | + watermark?: boolean; |
| 94 | + [property: string]: unknown; |
| 95 | +} |
| 96 | + |
| 97 | +/** |
| 98 | + * Outputs for Text Generation inference |
| 99 | + */ |
| 100 | +export interface TextGenerationOutput { |
| 101 | + /** |
| 102 | + * When enabled, details about the generation |
| 103 | + */ |
| 104 | + details?: TextGenerationOutputDetails; |
| 105 | + /** |
| 106 | + * The generated text |
| 107 | + */ |
| 108 | + generated_text: string; |
| 109 | + [property: string]: unknown; |
| 110 | +} |
| 111 | + |
| 112 | +/** |
| 113 | + * When enabled, details about the generation |
| 114 | + */ |
| 115 | +export interface TextGenerationOutputDetails { |
| 116 | + /** |
| 117 | + * Details about additional sequences when best_of is provided |
| 118 | + */ |
| 119 | + best_of_sequences?: TextGenerationOutputSequenceDetails[]; |
| 120 | + /** |
| 121 | + * The reason why the generation was stopped. |
| 122 | + */ |
| 123 | + finish_reason: TextGenerationFinishReason; |
| 124 | + /** |
| 125 | + * The number of generated tokens |
| 126 | + */ |
| 127 | + generated_tokens: number; |
| 128 | + prefill: TextGenerationPrefillToken[]; |
| 129 | + /** |
| 130 | + * The random seed used for generation |
| 131 | + */ |
| 132 | + seed?: number; |
| 133 | + /** |
| 134 | + * The generated tokens and associated details |
| 135 | + */ |
| 136 | + tokens: TextGenerationOutputToken[]; |
| 137 | + /** |
| 138 | + * Most likely tokens |
| 139 | + */ |
| 140 | + top_tokens?: Array<TextGenerationOutputToken[]>; |
| 141 | + [property: string]: unknown; |
| 142 | +} |
| 143 | + |
| 144 | +export interface TextGenerationOutputSequenceDetails { |
| 145 | + finish_reason: TextGenerationFinishReason; |
| 146 | + /** |
| 147 | + * The generated text |
| 148 | + */ |
| 149 | + generated_text: string; |
| 150 | + /** |
| 151 | + * The number of generated tokens |
| 152 | + */ |
| 153 | + generated_tokens: number; |
| 154 | + prefill: TextGenerationPrefillToken[]; |
| 155 | + /** |
| 156 | + * The random seed used for generation |
| 157 | + */ |
| 158 | + seed?: number; |
| 159 | + /** |
| 160 | + * The generated tokens and associated details |
| 161 | + */ |
| 162 | + tokens: TextGenerationOutputToken[]; |
| 163 | + /** |
| 164 | + * Most likely tokens |
| 165 | + */ |
| 166 | + top_tokens?: Array<TextGenerationOutputToken[]>; |
| 167 | + [property: string]: unknown; |
| 168 | +} |
| 169 | + |
| 170 | +export interface TextGenerationPrefillToken { |
| 171 | + id: number; |
| 172 | + logprob: number; |
| 173 | + /** |
| 174 | + * The text associated with that token |
| 175 | + */ |
| 176 | + text: string; |
| 177 | + [property: string]: unknown; |
| 178 | +} |
| 179 | + |
| 180 | +/** |
| 181 | + * Generated token. |
| 182 | + */ |
| 183 | +export interface TextGenerationOutputToken { |
| 184 | + id: number; |
| 185 | + logprob?: number; |
| 186 | + /** |
| 187 | + * Whether or not that token is a special one |
| 188 | + */ |
| 189 | + special: boolean; |
| 190 | + /** |
| 191 | + * The text associated with that token |
| 192 | + */ |
| 193 | + text: string; |
| 194 | + [property: string]: unknown; |
| 195 | +} |
| 196 | + |
| 197 | +/** |
| 198 | + * The reason why the generation was stopped. |
| 199 | + * |
| 200 | + * length: The generated sequence reached the maximum allowed length |
| 201 | + * |
| 202 | + * eos_token: The model generated an end-of-sentence (EOS) token |
| 203 | + * |
| 204 | + * stop_sequence: One of the sequence in stop_sequences was generated |
| 205 | + */ |
| 206 | +export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence"; |
| 207 | + |
6 | 208 | /** |
7 | 209 | * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with). |
8 | 210 | */ |
|
0 commit comments