instructor-ai · ivanleomk · Oct 8, 2024 · Oct 9, 2024
diff --git a/instructor/multimodal.py b/instructor/multimodal.py
@@ -75,19 +75,23 @@ def to_openai(self) -> dict[str, Any]:
 
 
 def convert_contents(
- contents: Union[list[Union[str, Image]], str, Image], # noqa: UP007
+ contents: Union[ # noqa: UP007
+ list[Union[str, dict[str, Any], Image]], str, dict[str, Any], Image # noqa: UP007
+ ],
  mode: Mode,
 ) -> Union[str, list[dict[str, Any]]]: # noqa: UP007
  """Convert content items to the appropriate format based on the specified mode."""
  if isinstance(contents, str):
  return contents
- if isinstance(contents, Image):
+ if isinstance(contents, Image) or isinstance(contents, dict):
  contents = [contents]
 
  converted_contents: list[dict[str, Union[str, Image]]] = [] # noqa: UP007
  for content in contents:
  if isinstance(content, str):
  converted_contents.append({"type": "text", "text": content})
+ elif isinstance(content, dict):
+ converted_contents.append(content)
  elif isinstance(content, Image):
  if mode in {Mode.ANTHROPIC_JSON, Mode.ANTHROPIC_TOOLS}:
  converted_contents.append(content.to_anthropic())
@@ -101,7 +105,12 @@ def convert_contents(
 
 
 def convert_messages(
- messages: list[dict[str, Union[str, list[Union[str, Image]]]]], # noqa: UP007
+ messages: list[
+ dict[
+ str,
+ Union[list[Union[str, dict[str, Any], Image]], str, dict[str, Any], Image], # noqa: UP007
+ ]
+ ], # noqa: UP007
  mode: Mode,
 ) -> list[dict[str, Any]]:
  """Convert messages to the appropriate format based on the specified mode."""

diff --git a/tests/test_multimodal.py b/tests/test_multimodal.py
@@ -180,3 +180,13 @@ def test_convert_contents_anthropic_mode():
  assert converted[1]["type"] == "image"
  assert converted[1]["source"]["type"] == "base64"
  assert converted[1]["source"]["media_type"] == "image/png"
+
+
+def test_convert_contents_custom_dict():
+ contents = {
+ "type": "image_url",
+ "image_url": {"url": f"data:image/png;base64,base64_img"},
+ }
+ converted = list(convert_contents(contents, Mode.TOOLS))
+ assert len(converted) == 1
+ assert converted == [contents]