Skip to content

Commit

Permalink
Make all outputs in "streaming" format
Browse files Browse the repository at this point in the history
Doing this to make it easier for Ryan to parse with a singular unified output format, regardless of whether the model parser actually supports streaming or not.

- moved `runPromptStream` -> `runPrompt`, overriding the old definition of `runPrompt` by now passing in an `enableStreaming` flag
- still relying on `isStreamingSupported()` function to set the `enableStreaming` param to true or false
- default to `stream` param being `True` on backend server (however this has no effect for non-streaming models like Dall-E)

## Test Plan
Test that both streaming and non-streaming settings work as expected, as well as test that a model which does not support streaming (ex: Dall-E) still works. Now when user hasn't explicitly clicked the "stream" setting, it will default to streaming. However if they explictly toggle it turns off. Follow up we should have the "stream" button auto-enabled to reflect this (doesn't have to actually be in the config, we should just have the UI show it as on by default to match user expectation)

Update: Updated this so that the default value for `stream` is now `true` inside of `OpenAIChatModelParserPromptSchema`, `HuggingFaceTextGenerationParserPromptSchema` and `AnyscaleEndpointPromptSchema`. Couldn't see it defined in `PaLMTextParserPromptSchema`


https://github.com/lastmile-ai/aiconfig/assets/151060367/34214a66-0cea-4774-a917-9476359f172c
  • Loading branch information
Rossdan Craig rossdan@lastmileai.dev committed Jan 8, 2024
1 parent 54de9cc commit c08f51b
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 68 deletions.
23 changes: 11 additions & 12 deletions python/src/aiconfig/editor/client/src/Editor.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,22 @@ export default function Editor() {
});
}, []);

const runPrompt = useCallback(async (promptName: string) => {
return await ufetch.post(ROUTE_TABLE.RUN_PROMPT, {
prompt_name: promptName,
});
}, []);

const runPromptStream = useCallback(
async (promptName: string, onStream: RunPromptStreamCallback) => {
await streamingApi(
const runPrompt = useCallback(
async (
promptName: string,
onStream: RunPromptStreamCallback,
enableStreaming: boolean = true
) => {
// Note: We run the streaming API even for
// non-streaming runs so that we can unify
// the way we process data on the client
return await streamingApi<{ aiconfig?: AIConfig }>(
{
url: ROUTE_TABLE.RUN_PROMPT,
method: "POST",
body: {
prompt_name: promptName,
stream: true,
stream: enableStreaming,
},
},
"output_chunk",
Expand Down Expand Up @@ -156,7 +157,6 @@ export default function Editor() {
getModels,
getServerStatus,
runPrompt,
runPromptStream,
save,
setConfigDescription,
setConfigName,
Expand All @@ -170,7 +170,6 @@ export default function Editor() {
getModels,
getServerStatus,
runPrompt,
runPromptStream,
save,
setConfigDescription,
setConfigName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ import {
import AddPromptButton from "./prompt/AddPromptButton";
import {
getDefaultNewPromptName,
getModelSettingsStream,
getPrompt,
isStreamingSupported,
} from "../utils/aiconfigStateUtils";
import { debounce, uniqueId } from "lodash";
import { FieldWithPossiblyUndefined, debounce, uniqueId } from "lodash";
import PromptMenuButton from "./prompt/PromptMenuButton";
import GlobalParametersContainer from "./GlobalParametersContainer";
import AIConfigContext from "./AIConfigContext";
Expand Down Expand Up @@ -79,11 +79,11 @@ export type AIConfigCallbacks = {
deletePrompt: (promptName: string) => Promise<void>;
getModels: (search: string) => Promise<string[]>;
getServerStatus?: () => Promise<{ status: "OK" | "ERROR" }>;
runPrompt: (promptName: string) => Promise<{ aiconfig: AIConfig }>;
runPromptStream: (
runPrompt: (
promptName: string,
onStream: RunPromptStreamCallback
) => Promise<void>;
onStream: RunPromptStreamCallback,
enableStreaming?: boolean
) => Promise<{ aiconfig?: AIConfig }>;
save: (aiconfig: AIConfig) => Promise<void>;
setConfigDescription: (description: string) => Promise<void>;
setConfigName: (name: string) => Promise<void>;
Expand Down Expand Up @@ -544,7 +544,6 @@ export default function EditorContainer({
);

const runPromptCallback = callbacks.runPrompt;
const runPromptStreamCallback = callbacks.runPromptStream;

const onRunPrompt = useCallback(
async (promptId: string) => {
Expand All @@ -562,10 +561,14 @@ export default function EditorContainer({
}

const promptName = statePrompt.name;
const isStream = isStreamingSupported(statePrompt, stateRef.current);
const enableStreaming: boolean | undefined = getModelSettingsStream(
statePrompt!,
stateRef.current
);

if (isStream) {
await runPromptStreamCallback(promptName, (event) => {
const serverConfigResponse = await runPromptCallback(
promptName,
(event) => {
if (event.type === "output_chunk") {
dispatch({
type: "STREAM_OUTPUT_CHUNK",
Expand All @@ -579,15 +582,15 @@ export default function EditorContainer({
config: event.data,
});
}
});
return;
} else {
const serverConfigRes = await runPromptCallback(promptName);
},
enableStreaming
);

if (serverConfigResponse.aiconfig) {
dispatch({
type: "CONSOLIDATE_AICONFIG",
action,
config: serverConfigRes.aiconfig,
config: serverConfigResponse.aiconfig,
});
}
} catch (err: unknown) {
Expand All @@ -609,7 +612,7 @@ export default function EditorContainer({
});
}
},
[runPromptCallback, runPromptStreamCallback]
[runPromptCallback]
);

const setNameCallback = callbacks.setConfigName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export const AnyscaleEndpointPromptSchema: PromptSchema = {
},
stream: {
type: "boolean",
default: true,
description: `If true, send messages token by token. If false, messages send in bulk.`,
},
temperature: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export const HuggingFaceTextGenerationParserPromptSchema: PromptSchema = {
},
stream: {
type: "boolean",
default: true,
},
do_sample: {
type: "boolean",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ export const OpenAIChatModelParserPromptSchema: PromptSchema = {
},
stream: {
type: "boolean",
default: true,
description: `Whether to stream back partial progress.
If set, tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message. Example Python code.`,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,10 @@ export function getPrompt(
return aiconfig.prompts.find((prompt) => prompt._ui.id === id);
}

// TODO: This is pretty hacky. Streaming is actually part of AIConfig runtime and not necessarily part of model settings,
// let alone required to be defined by 'stream' boolean... Ideally we should treat everything as stream but this should work for now.
export function isStreamingSupported(
export function getModelSettingsStream(
prompt: ClientPrompt,
config: ClientAIConfig
): boolean {
): boolean | undefined {
const promptModelSettings =
prompt.metadata?.model && typeof prompt.metadata.model !== "string"
? prompt.metadata.model?.settings
Expand All @@ -40,8 +38,12 @@ export function isStreamingSupported(
if (promptModelName) {
const globalModelSettings =
config.metadata?.models?.[promptModelName]?.settings;
return globalModelSettings?.stream === true;
if (globalModelSettings?.stream === true) {
return true;
} else if (promptModelSettings?.stream === false) {
return false;
}
}

return false;
return undefined;
}
44 changes: 10 additions & 34 deletions python/src/aiconfig/editor/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def run() -> FlaskResponse:
# aiconfig.get_parameters(prompt_name) directly inside of run. See:
# https://github.com/lastmile-ai/aiconfig/issues/671
params = request_json.get("params", aiconfig.get_parameters(prompt_name)) # type: ignore
stream = request_json.get("stream", False) # TODO: set this automatically to True after client supports stream output
stream = request_json.get("stream", True)

# Define stream callback and queue object for streaming results
output_text_queue = QueueIterator()
Expand Down Expand Up @@ -291,40 +291,16 @@ def run_async_config_in_thread():
yield "]"

try:
if stream:
LOGGER.info(f"Running `aiconfig.run()` command with request: {request_json}")
# Streaming based on
# https://stackoverflow.com/questions/73275517/flask-not-streaming-json-response
return Response(
stream_with_context(generate()),
status=200,
content_type="application/json",
)

# Run without streaming
inference_options = InferenceOptions(stream=stream)
def run_async_config_in_thread():
asyncio.run(
aiconfig.run(
prompt_name=prompt_name,
params=params,
run_with_dependencies=False,
options=inference_options,
)
)
output_text_queue.put(STOP_STREAMING_SIGNAL)

t = threading.Thread(target=run_async_config_in_thread)
t.start()
LOGGER.info(f"Running `aiconfig.run()` command with request: {request_json}")
t.join()
return HttpResponseWithAIConfig(
#
message="Ran prompt",
code=200,
aiconfig=aiconfig,
).to_flask_format()

# Note; We run the streaming API even for non-streaming runs so that
# we can unify the way we process data on the client
# Streaming based on
# https://stackoverflow.com/questions/73275517/flask-not-streaming-json-response
return Response(
stream_with_context(generate()),
status=200,
content_type="application/json",
)
except Exception as e:
return HttpResponseWithAIConfig(
#
Expand Down

0 comments on commit c08f51b

Please sign in to comment.