11import base64
2+ import os
23from functools import lru_cache
34from io import BytesIO
45from typing import Any , List , Optional , Tuple , TypeVar , Union
1819cached_get_tokenizer = lru_cache (get_tokenizer )
1920
2021
21- def _load_image_from_bytes (b : bytes ):
22+ def _load_image_from_bytes (b : bytes ) -> Image . Image :
2223 image = Image .open (BytesIO (b ))
2324 image .load ()
2425 return image
2526
2627
27- def _load_image_from_data_url (image_url : str ):
28+ def _is_subpath (image_path : str , allowed_local_media_path : str ) -> bool :
29+ # Get the common path
30+ common_path = os .path .commonpath ([
31+ os .path .abspath (image_path ),
32+ os .path .abspath (allowed_local_media_path )
33+ ])
34+ # Check if the common path is the same as allowed_local_media_path
35+ return common_path == os .path .abspath (allowed_local_media_path )
36+
37+
38+ def _load_image_from_file (image_url : str ,
39+ allowed_local_media_path : str ) -> Image .Image :
40+ if not allowed_local_media_path :
41+ raise ValueError ("Invalid 'image_url': Cannot load local files without"
42+ "'--allowed-local-media-path'." )
43+ if allowed_local_media_path :
44+ if not os .path .exists (allowed_local_media_path ):
45+ raise ValueError (
46+ "Invalid '--allowed-local-media-path': "
47+ f"The path { allowed_local_media_path } does not exist." )
48+ if not os .path .isdir (allowed_local_media_path ):
49+ raise ValueError (
50+ "Invalid '--allowed-local-media-path': "
51+ f"The path { allowed_local_media_path } must be a directory." )
52+
53+ # Only split once and assume the second part is the image path
54+ _ , image_path = image_url .split ("file://" , 1 )
55+ if not _is_subpath (image_path , allowed_local_media_path ):
56+ raise ValueError (
57+ f"Invalid 'image_url': The file path { image_path } must"
58+ " be a subpath of '--allowed-local-media-path'"
59+ f" '{ allowed_local_media_path } '." )
60+
61+ image = Image .open (image_path )
62+ image .load ()
63+ return image
64+
65+
66+ def _load_image_from_data_url (image_url : str ) -> Image .Image :
2867 # Only split once and assume the second part is the base64 encoded image
2968 _ , image_base64 = image_url .split ("," , 1 )
3069 return load_image_from_base64 (image_base64 )
3170
3271
33- def fetch_image (image_url : str , * , image_mode : str = "RGB" ) -> Image .Image :
72+ def fetch_image (image_url : str ,
73+ * ,
74+ image_mode : str = "RGB" ,
75+ allowed_local_media_path : str = "" ) -> Image .Image :
3476 """
3577 Load a PIL image from a HTTP or base64 data URL.
3678
@@ -43,16 +85,19 @@ def fetch_image(image_url: str, *, image_mode: str = "RGB") -> Image.Image:
4385
4486 elif image_url .startswith ('data:image' ):
4587 image = _load_image_from_data_url (image_url )
88+ elif image_url .startswith ('file://' ):
89+ image = _load_image_from_file (image_url , allowed_local_media_path )
4690 else :
4791 raise ValueError ("Invalid 'image_url': A valid 'image_url' must start "
48- "with either 'data:image' or 'http'." )
92+ "with either 'data:image', 'file://' or 'http'." )
4993
5094 return image .convert (image_mode )
5195
5296
5397async def async_fetch_image (image_url : str ,
5498 * ,
55- image_mode : str = "RGB" ) -> Image .Image :
99+ image_mode : str = "RGB" ,
100+ allowed_local_media_path : str = "" ) -> Image .Image :
56101 """
57102 Asynchronously load a PIL image from a HTTP or base64 data URL.
58103
@@ -65,9 +110,11 @@ async def async_fetch_image(image_url: str,
65110
66111 elif image_url .startswith ('data:image' ):
67112 image = _load_image_from_data_url (image_url )
113+ elif image_url .startswith ('file://' ):
114+ image = _load_image_from_file (image_url , allowed_local_media_path )
68115 else :
69116 raise ValueError ("Invalid 'image_url': A valid 'image_url' must start "
70- "with either 'data:image' or 'http'." )
117+ "with either 'data:image', 'file://' or 'http'." )
71118
72119 return image .convert (image_mode )
73120
@@ -126,8 +173,12 @@ def get_and_parse_audio(audio_url: str) -> MultiModalDataDict:
126173 return {"audio" : (audio , sr )}
127174
128175
129- def get_and_parse_image (image_url : str ) -> MultiModalDataDict :
130- image = fetch_image (image_url )
176+ def get_and_parse_image (
177+ image_url : str ,
178+ * ,
179+ allowed_local_media_path : str = "" ) -> MultiModalDataDict :
180+ image = fetch_image (image_url ,
181+ allowed_local_media_path = allowed_local_media_path )
131182 return {"image" : image }
132183
133184
@@ -136,8 +187,12 @@ async def async_get_and_parse_audio(audio_url: str) -> MultiModalDataDict:
136187 return {"audio" : (audio , sr )}
137188
138189
139- async def async_get_and_parse_image (image_url : str ) -> MultiModalDataDict :
140- image = await async_fetch_image (image_url )
190+ async def async_get_and_parse_image (
191+ image_url : str ,
192+ * ,
193+ allowed_local_media_path : str = "" ) -> MultiModalDataDict :
194+ image = await async_fetch_image (
195+ image_url , allowed_local_media_path = allowed_local_media_path )
141196 return {"image" : image }
142197
143198
0 commit comments