Make all outputs in "streaming" format

Doing this to make it easier for Ryan to parse with a singular unified output format, regardless of whether the model parser actually supports streaming or not. - moved `runPromptStream` -> `runPrompt`, overriding the old definition of `runPrompt` by now passing in an `enableStreaming` flag - still relying on `isStreamingSupported()` function to set the `enableStreaming` param to true or false - default to `stream` param being `True` on backend server (however this has no effect for non-streaming models like Dall-E) ## Test Plan Test that both streaming and non-streaming settings work as expected, as well as test that a model which does not support streaming (ex: Dall-E) still works. Now when user hasn't explicitly clicked the "stream" setting, it will default to streaming. However if they explictly toggle it turns off. Follow up we should have the "stream" button auto-enabled to reflect this (doesn't have to actually be in the config, we should just have the UI show it as on by default to match user expectation) Update: Updated this so that the default value for `stream` is now `true` inside of `OpenAIChatModelParserPromptSchema`, `HuggingFaceTextGenerationParserPromptSchema` and `AnyscaleEndpointPromptSchema`. Couldn't see it defined in `PaLMTextParserPromptSchema` https://github.com/lastmile-ai/aiconfig/assets/151060367/34214a66-0cea-4774-a917-9476359f172c
lastmile-ai · Jan 8, 2024 · c08f51b · c08f51b
1 parent 54de9cc
commit c08f51b
Show file tree

Hide file tree

Showing 7 changed files with 51 additions and 68 deletions.
diff --git a/python/src/aiconfig/editor/client/src/Editor.tsx b/python/src/aiconfig/editor/client/src/Editor.tsx
@@ -68,21 +68,22 @@ export default function Editor() {
  });
  }, []);
 
- const runPrompt = useCallback(async (promptName: string) => {
- return await ufetch.post(ROUTE_TABLE.RUN_PROMPT, {
- prompt_name: promptName,
- });
- }, []);
-
- const runPromptStream = useCallback(
- async (promptName: string, onStream: RunPromptStreamCallback) => {
- await streamingApi(
+ const runPrompt = useCallback(
+ async (
+ promptName: string,
+ onStream: RunPromptStreamCallback,
+ enableStreaming: boolean = true
+ ) => {
+ // Note: We run the streaming API even for
+ // non-streaming runs so that we can unify
+ // the way we process data on the client
+ return await streamingApi<{ aiconfig?: AIConfig }>(
  {
  url: ROUTE_TABLE.RUN_PROMPT,
  method: "POST",
  body: {
  prompt_name: promptName,
- stream: true,
+ stream: enableStreaming,
  },
  },
  "output_chunk",
@@ -156,7 +157,6 @@ export default function Editor() {
  getModels,
  getServerStatus,
  runPrompt,
- runPromptStream,
  save,
  setConfigDescription,
  setConfigName,
@@ -170,7 +170,6 @@ export default function Editor() {
  getModels,
  getServerStatus,
  runPrompt,
- runPromptStream,
  save,
  setConfigDescription,
  setConfigName,

diff --git a/python/src/aiconfig/editor/client/src/components/EditorContainer.tsx b/python/src/aiconfig/editor/client/src/components/EditorContainer.tsx
@@ -36,10 +36,10 @@ import {
 import AddPromptButton from "./prompt/AddPromptButton";
 import {
  getDefaultNewPromptName,
+ getModelSettingsStream,
  getPrompt,
- isStreamingSupported,
 } from "../utils/aiconfigStateUtils";
-import { debounce, uniqueId } from "lodash";
+import { FieldWithPossiblyUndefined, debounce, uniqueId } from "lodash";
 import PromptMenuButton from "./prompt/PromptMenuButton";
 import GlobalParametersContainer from "./GlobalParametersContainer";
 import AIConfigContext from "./AIConfigContext";
@@ -79,11 +79,11 @@ export type AIConfigCallbacks = {
  deletePrompt: (promptName: string) => Promise<void>;
  getModels: (search: string) => Promise<string[]>;
  getServerStatus?: () => Promise<{ status: "OK" | "ERROR" }>;
- runPrompt: (promptName: string) => Promise<{ aiconfig: AIConfig }>;
- runPromptStream: (
+ runPrompt: (
  promptName: string,
- onStream: RunPromptStreamCallback
- ) => Promise<void>;
+ onStream: RunPromptStreamCallback,
+ enableStreaming?: boolean
+ ) => Promise<{ aiconfig?: AIConfig }>;
  save: (aiconfig: AIConfig) => Promise<void>;
  setConfigDescription: (description: string) => Promise<void>;
  setConfigName: (name: string) => Promise<void>;
@@ -544,7 +544,6 @@ export default function EditorContainer({
  );
 
  const runPromptCallback = callbacks.runPrompt;
- const runPromptStreamCallback = callbacks.runPromptStream;
 
  const onRunPrompt = useCallback(
  async (promptId: string) => {
@@ -562,10 +561,14 @@ export default function EditorContainer({
  }
 
  const promptName = statePrompt.name;
- const isStream = isStreamingSupported(statePrompt, stateRef.current);
+ const enableStreaming: boolean | undefined = getModelSettingsStream(
+ statePrompt!,
+ stateRef.current
+ );
 
- if (isStream) {
- await runPromptStreamCallback(promptName, (event) => {
+ const serverConfigResponse = await runPromptCallback(
+ promptName,
+ (event) => {
  if (event.type === "output_chunk") {
  dispatch({
  type: "STREAM_OUTPUT_CHUNK",
@@ -579,15 +582,15 @@ export default function EditorContainer({
  config: event.data,
  });
  }
- });
- return;
- } else {
- const serverConfigRes = await runPromptCallback(promptName);
+ },
+ enableStreaming
+ );
 
+ if (serverConfigResponse.aiconfig) {
  dispatch({
  type: "CONSOLIDATE_AICONFIG",
  action,
- config: serverConfigRes.aiconfig,
+ config: serverConfigResponse.aiconfig,
  });
  }
  } catch (err: unknown) {
@@ -609,7 +612,7 @@ export default function EditorContainer({
  });
  }
  },
- [runPromptCallback, runPromptStreamCallback]
+ [runPromptCallback]
  );
 
  const setNameCallback = callbacks.setConfigName;

diff --git a/python/src/aiconfig/editor/client/src/shared/prompt_schemas/AnyscaleEndpointPromptSchema.ts b/python/src/aiconfig/editor/client/src/shared/prompt_schemas/AnyscaleEndpointPromptSchema.ts
@@ -40,6 +40,7 @@ export const AnyscaleEndpointPromptSchema: PromptSchema = {
  },
  stream: {
  type: "boolean",
+ default: true,
  description: `If true, send messages token by token. If false, messages send in bulk.`,
  },
  temperature: {

diff --git a/...ig/editor/client/src/shared/prompt_schemas/HuggingFaceTextGenerationParserPromptSchema.ts b/...ig/editor/client/src/shared/prompt_schemas/HuggingFaceTextGenerationParserPromptSchema.ts
@@ -32,6 +32,7 @@ export const HuggingFaceTextGenerationParserPromptSchema: PromptSchema = {
  },
  stream: {
  type: "boolean",
+ default: true,
  },
  do_sample: {
  type: "boolean",

diff --git a/...src/aiconfig/editor/client/src/shared/prompt_schemas/OpenAIChatModelParserPromptSchema.ts b/...src/aiconfig/editor/client/src/shared/prompt_schemas/OpenAIChatModelParserPromptSchema.ts
@@ -104,6 +104,7 @@ export const OpenAIChatModelParserPromptSchema: PromptSchema = {
  },
  stream: {
  type: "boolean",
+ default: true,
  description: `Whether to stream back partial progress. 
  If set, tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message. Example Python code.`,
  },

diff --git a/python/src/aiconfig/editor/client/src/utils/aiconfigStateUtils.ts b/python/src/aiconfig/editor/client/src/utils/aiconfigStateUtils.ts
@@ -18,12 +18,10 @@ export function getPrompt(
  return aiconfig.prompts.find((prompt) => prompt._ui.id === id);
 }
 
-// TODO: This is pretty hacky. Streaming is actually part of AIConfig runtime and not necessarily part of model settings,
-// let alone required to be defined by 'stream' boolean... Ideally we should treat everything as stream but this should work for now.
-export function isStreamingSupported(
+export function getModelSettingsStream(
  prompt: ClientPrompt,
  config: ClientAIConfig
-): boolean {
+): boolean | undefined {
  const promptModelSettings =
  prompt.metadata?.model && typeof prompt.metadata.model !== "string"
  ? prompt.metadata.model?.settings
@@ -40,8 +38,12 @@ export function isStreamingSupported(
  if (promptModelName) {
  const globalModelSettings =
  config.metadata?.models?.[promptModelName]?.settings;
- return globalModelSettings?.stream === true;
+ if (globalModelSettings?.stream === true) {
+ return true;
+ } else if (promptModelSettings?.stream === false) {
+ return false;
+ }
  }
 
- return false;
+ return undefined;
 }
diff --git a/python/src/aiconfig/editor/server/server.py b/python/src/aiconfig/editor/server/server.py
@@ -202,7 +202,7 @@ def run() -> FlaskResponse:
  # aiconfig.get_parameters(prompt_name) directly inside of run. See:
  # https://github.com/lastmile-ai/aiconfig/issues/671
  params = request_json.get("params", aiconfig.get_parameters(prompt_name)) # type: ignore
- stream = request_json.get("stream", False) # TODO: set this automatically to True after client supports stream output
+ stream = request_json.get("stream", True)
 
  # Define stream callback and queue object for streaming results
  output_text_queue = QueueIterator()
@@ -291,40 +291,16 @@ def run_async_config_in_thread():
  yield "]"
 
  try:
- if stream:
- LOGGER.info(f"Running `aiconfig.run()` command with request: {request_json}")
- # Streaming based on
- # https://stackoverflow.com/questions/73275517/flask-not-streaming-json-response
- return Response(
- stream_with_context(generate()),
- status=200,
- content_type="application/json",
- )
-
- # Run without streaming
- inference_options = InferenceOptions(stream=stream)
- def run_async_config_in_thread():
- asyncio.run(
- aiconfig.run(
- prompt_name=prompt_name,
- params=params,
- run_with_dependencies=False,
- options=inference_options,
- )
- )
- output_text_queue.put(STOP_STREAMING_SIGNAL)
-
- t = threading.Thread(target=run_async_config_in_thread)
- t.start()
  LOGGER.info(f"Running `aiconfig.run()` command with request: {request_json}")
- t.join()
- return HttpResponseWithAIConfig(
- #
- message="Ran prompt",
- code=200,
- aiconfig=aiconfig,
- ).to_flask_format()
-
+ # Note; We run the streaming API even for non-streaming runs so that
+ # we can unify the way we process data on the client
+ # Streaming based on
+ # https://stackoverflow.com/questions/73275517/flask-not-streaming-json-response
+ return Response(
+ stream_with_context(generate()),
+ status=200,
+ content_type="application/json",
+ ) 
  except Exception as e:
  return HttpResponseWithAIConfig(
  #