Skip to content

Commit

Permalink
Multimodal patches (#971)
Browse files Browse the repository at this point in the history
- Fix a bug where when treatUrlsAndPointersAsImages is unset and treatUrlsAndPointersAsMedia is set, Marqo returns an error where treatUrlsAndPointersAsImages cannot be False when treatUrlsAndPointersAsMedia is True
- Add new video-audio model LanguageBind/Video_V1.5_FT_Audio_FT to the model registry.
- Move languagebind tests from CPU to CUDA tests
- Change audioPreprocessing chunk length from 20 to 10
  • Loading branch information
RaynorChavez authored Sep 18, 2024
1 parent f6f7d01 commit a0084a8
Show file tree
Hide file tree
Showing 16 changed files with 55 additions and 31 deletions.
1 change: 0 additions & 1 deletion .github/workflows/unit_test_200gb_CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ jobs:
export VESPA_CONFIG_URL=http://localhost:19071
export VESPA_DOCUMENT_URL=http://localhost:8080
export VESPA_QUERY_URL=http://localhost:8080
export MARQO_MAX_CPU_MODEL_MEMORY=15
cd marqo
export PYTHONPATH="./tests:./src:."
Expand Down
7 changes: 7 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# Release 2.12.1

## Bug fixes and minor changes
- Fix a bug where when `treatUrlsAndPointersAsImages` is unset and `treatUrlsAndPointersAsMedia` is set, Marqo returns an error where `treatUrlsAndPointersAsImages` cannot be `False` when `treatUrlsAndPointersAsMedia` is `True`
- Add new video-audio model `LanguageBind/Video_V1.5_FT_Audio_FT` to the model registry.


# Release 2.12.0

## New features
Expand Down
1 change: 1 addition & 0 deletions requirements.dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ kazoo==2.10.0
pycurl==7.45.3
certifi==2019.11.28
transformers==4.41.2
huggingface-hub==0.25.0

# s2_inference:
more_itertools
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ cachetools==5.3.1
pynvml==11.5.0 # For cuda utilization
readerwriterlock==1.0.9
kazoo==2.10.0
pycurl==7.45.3
pycurl==7.45.3
huggingface-hub==0.25.0
2 changes: 1 addition & 1 deletion src/marqo/s2_inference/model_downloading/from_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Optional
from huggingface_hub import hf_hub_download
from marqo.s2_inference.logger import get_logger
from huggingface_hub.utils._errors import RepositoryNotFoundError
from huggingface_hub.errors import RepositoryNotFoundError
from marqo.s2_inference.errors import ModelDownloadError

logger = get_logger(__name__)
Expand Down
20 changes: 15 additions & 5 deletions src/marqo/s2_inference/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1995,7 +1995,17 @@ def _get_languagebind_properties() -> Dict:
"model_size": 8,
"supported_modalities": ["video", "audio", "language", "image"],
"video_chunk_length": 20,
"audio_chunk_length": 20,
"audio_chunk_length": 10,
},
'LanguageBind/Video_V1.5_FT_Audio_FT': {
"name": "LanguageBind/Video_V1.5_FT_Audio_FT",
"dimensions": 768,
"type": "languagebind",
"loader": "languagebind",
"model_size": 5,
"supported_modalities": ["video", "audio", "language"],
"video_chunk_length": 20,
"audio_chunk_length": 10,
},
'LanguageBind/Video_V1.5_FT_Image': {
"name": "LanguageBind/Video_V1.5_FT_Image",
Expand All @@ -2005,7 +2015,7 @@ def _get_languagebind_properties() -> Dict:
"model_size": 5,
"supported_modalities": ["video", "language", "image"],
"video_chunk_length": 20,
"audio_chunk_length": 20,
"audio_chunk_length": 10,
},
'LanguageBind/Audio_FT_Image': {
"name": "LanguageBind/Audio_FT_Image",
Expand All @@ -2015,7 +2025,7 @@ def _get_languagebind_properties() -> Dict:
"model_size": 5,
"supported_modalities": ["audio", "language", "image"],
"video_chunk_length": 20,
"audio_chunk_length": 20,
"audio_chunk_length": 10,
},
'LanguageBind/Audio_FT': {
"name": "LanguageBind/Audio_FT",
Expand All @@ -2025,7 +2035,7 @@ def _get_languagebind_properties() -> Dict:
"model_size": 2,
"supported_modalities": ["video", "language"],
"video_chunk_length": 20,
"audio_chunk_length": 20,
"audio_chunk_length": 10,
},
'LanguageBind/Video_V1.5_FT': {
"name": "LanguageBind/Video_V1.5_FT",
Expand All @@ -2035,7 +2045,7 @@ def _get_languagebind_properties() -> Dict:
"model_size": 2,
"supported_modalities": ["video", "language"],
"video_chunk_length": 20,
"audio_chunk_length": 20,
"audio_chunk_length": 10,
},

}
Expand Down
5 changes: 5 additions & 0 deletions src/marqo/s2_inference/multimodal_model_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ def _load_languagebind_model(self):
'audio': 'LanguageBind_Audio_FT',
'image': 'LanguageBind_Image',
}
elif self.model_name == "LanguageBind/Video_V1.5_FT_Audio_FT":
self.clip_type = {
'video': 'LanguageBind_Video_V1.5_FT',
'audio': 'LanguageBind_Audio_FT',
}
elif self.model_name == "LanguageBind/Video_V1.5_FT_Image":
self.clip_type = {
'video': 'LanguageBind_Video_V1.5_FT',
Expand Down
3 changes: 2 additions & 1 deletion src/marqo/s2_inference/s2_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,8 @@ def _check_memory_threshold_for_model(device: str, model_size: Union[float, int]
raise ModelCacheManagementError(
f"You are trying to load a model with size = `{model_size}` into device = `{device}`, which is larger than the device threshold = `{threshold}`. "
f"Marqo CANNOT find enough space for the model. Please change the threshold by adjusting the environment variables.\n"
f"You can find more detailed information at `https://docs.marqo.ai/0.0.21/Advanced-Usage/configuration/`.")
f"Please modify the threshold by setting the environment variable `MARQO_MAX_CUDA_MODEL_MEMORY` or `MARQO_MAX_CPU_MODEL_MEMORY`."
f"You can find more detailed information at `https://docs.marqo.ai/latest/other-resources/guides/advanced-usage/configuration/`.")
return (used_memory + model_size) < threshold


Expand Down
14 changes: 6 additions & 8 deletions src/marqo/tensor_search/models/index_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class IndexSettings(StrictBaseModel):
splitOverlap=3,
)
audioPreprocessing: core.AudioPreProcessing = core.AudioPreProcessing(
splitLength=20,
splitLength=10,
splitOverlap=3,
)
vectorNumericType: core.VectorNumericType = core.VectorNumericType.Float
Expand All @@ -58,17 +58,12 @@ def validate_url_pointer_treatment(cls, values):
treat_as_images = values.get('treatUrlsAndPointersAsImages')
treat_as_media = values.get('treatUrlsAndPointersAsMedia')

if treat_as_images is None:
treat_as_images = False
if treat_as_media is None:
treat_as_media = False

if treat_as_images and not treat_as_media:
# Deprecation warning
import warnings
warnings.warn("'treatUrlsAndPointersAsImages' is deprecated. Use 'treatUrlsAndPointersAsMedia' instead.", DeprecationWarning)

if not treat_as_images and treat_as_media:
if treat_as_images == False and treat_as_media:
raise api_exceptions.InvalidArgError(
"Invalid combination: 'treatUrlsAndPointersAsImages' cannot be False when 'treatUrlsAndPointersAsMedia' is True."
)
Expand Down Expand Up @@ -161,7 +156,10 @@ def to_marqo_index_request(self, index_name: str) -> MarqoIndexRequest:
if self.treatUrlsAndPointersAsImages is None:
# Default value for treat_urls_and_pointers_as_images is False, but we can't set it in the model
# as it is not a valid parameter for structured indexes
self.treatUrlsAndPointersAsImages = False
if self.treatUrlsAndPointersAsMedia is True:
self.treatUrlsAndPointersAsImages = True
else:
self.treatUrlsAndPointersAsImages = False

if self.treatUrlsAndPointersAsMedia is None:
# Default value for treat_urls_and_pointers_as_media is False, but we can't set it in the model
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "2.12.0"
__version__ = "2.12.1"

def get_version() -> str:
return f"{__version__}"
10 changes: 4 additions & 6 deletions tests/core/index_management/test_get_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_default_settings(self):
'textPreprocessing': {'splitLength': 2,
'splitMethod': TextSplitMethod.Sentence,
'splitOverlap': 0},
'audioPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'audioPreprocessing': {'splitLength': 10, 'splitOverlap': 3},
'videoPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'treatUrlsAndPointersAsImages': False,
'treatUrlsAndPointersAsMedia': False,
Expand Down Expand Up @@ -136,7 +136,7 @@ def test_default_settings(self):
'splitMethod': TextSplitMethod.Sentence,
'splitOverlap': 0
},
'audioPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'audioPreprocessing': {'splitLength': 10, 'splitOverlap': 3},
'videoPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'type': IndexType.Structured,
'vectorNumericType': VectorNumericType.Float
Expand Down Expand Up @@ -165,7 +165,7 @@ def test_custom_settings(self):
'textPreprocessing': {'splitLength': 3,
'splitMethod': TextSplitMethod.Word,
'splitOverlap': 1},
'audioPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'audioPreprocessing': {'splitLength': 10, 'splitOverlap': 3},
'videoPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'treatUrlsAndPointersAsImages': False,
'treatUrlsAndPointersAsMedia': False,
Expand All @@ -175,7 +175,6 @@ def test_custom_settings(self):
# Get unstructured custom settings
retrieved_index = self.config.index_management.get_index(self.unstructured_custom_index.name)
retrieved_settings = IndexSettings.from_marqo_index(retrieved_index).dict(exclude_none=True, by_alias=True)
print(f"retrieved_settings: {retrieved_settings}")
self.assertEqual(retrieved_settings, expected_unstructured_custom_settings)

with self.subTest("Structured index custom settings"):
Expand Down Expand Up @@ -206,14 +205,13 @@ def test_custom_settings(self):
'splitMethod': TextSplitMethod.Word,
'splitOverlap': 1
},
'audioPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'audioPreprocessing': {'splitLength': 10, 'splitOverlap': 3},
'videoPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'type': IndexType.Structured,
'vectorNumericType': VectorNumericType.Float
}
# Get unstructured default settings
retrieved_index = self.config.index_management.get_index(self.structured_custom_index.name)
retrieved_settings = IndexSettings.from_marqo_index(retrieved_index).dict(exclude_none=True, by_alias=True)
print(f"retrieved_settings: {retrieved_settings}")
self.assertEqual(retrieved_settings, expected_structured_custom_settings)

2 changes: 1 addition & 1 deletion tests/s2_inference/model_downloading/test_from_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from marqo.s2_inference.errors import ModelDownloadError
from marqo.tensor_search.models.external_apis.hf import HfAuth, HfModelLocation
from marqo.s2_inference.model_downloading.from_hf import download_model_from_hf
from huggingface_hub.utils._errors import RepositoryNotFoundError
from huggingface_hub.errors import RepositoryNotFoundError
from marqo.s2_inference.configs import ModelCache


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ def test_add_document_callVectoriseWithoutPassingEnableCache(self):
"vectorise for add_documents")
mock_vectorise.reset_mock()

@pytest.mark.skipif(torch.cuda.is_available() is True, reason="GPU testing device needs to be investigated")
@pytest.mark.largemodel
@pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support")
def test_add_multimodal_single_documents(self):
""" """
documents = [
Expand Down Expand Up @@ -240,7 +241,8 @@ def test_add_multimodal_single_documents(self):
self.assertNotIn(embedding, embeddings, f"Duplicate embedding found in document {i}")
embeddings.append(embedding)

@pytest.mark.skipif(torch.cuda.is_available() is True, reason="GPU testing device needs to be investigated")
@pytest.mark.largemodel
@pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support")
def test_add_multimodal_field_document(self):
multimodal_document = {
"_id": "1_multimodal",
Expand Down
3 changes: 2 additions & 1 deletion tests/tensor_search/integ_tests/test_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,8 @@ def test_embed_image_url_as_image_not_text(self):
msg=f"Mismatch at index {i} for {index.type}")


@pytest.mark.skipif(torch.cuda.is_available() is True, reason="Skip this test if we have cuda support.")
@pytest.mark.largemodel
@pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support")
def test_embed_languagebind(self):
content = [
#TestImageUrls.HIPPO_REALISTIC.value, # image
Expand Down
6 changes: 4 additions & 2 deletions tests/tensor_search/integ_tests/test_search_combined.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ def tearDown(self) -> None:
super().tearDown()
self.device_patcher.stop()

@pytest.mark.skipif(torch.cuda.is_available() is True, reason="We skip this test if we have cuda support. This model is 5gb and is very slow on g4dn.xlarge and may crash it")
@pytest.mark.largemodel
@pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support")
def test_search_video(self):
documents = [
{"video_field_1": "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4", "_id": "1"},
Expand Down Expand Up @@ -232,7 +233,8 @@ def test_search_video(self):
self.assertEqual(results['hits'][0]['_id'], "1") # The video document should be the top result
self.assertGreater(results['hits'][0]['_score'], results['hits'][1]['_score']) # Video should have higher score

@pytest.mark.skipif(torch.cuda.is_available() is True, reason="We skip this test if we have cuda support. This model is 5gb and is very slow on g4dn.xlarge and may crash it")
@pytest.mark.largemodel
@pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support")
def test_search_audio(self):
documents = [
{"video_field_1": "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4", "_id": "1"},
Expand Down
1 change: 0 additions & 1 deletion tests/test_documentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from marqo import marqo_docs

@pytest.mark.skip(reason="skipping since we have a CI pipeline for this")
class TestDocumentation(unittest.TestCase):
def test_urls(self):
# Retrieve all public functions in the module
Expand Down

0 comments on commit a0084a8

Please sign in to comment.