From a0084a86d5cf797616a1f8e185eba87417edbc15 Mon Sep 17 00:00:00 2001 From: Raynor Chavez Date: Wed, 18 Sep 2024 14:19:51 +0800 Subject: [PATCH] Multimodal patches (#971) - Fix a bug where when treatUrlsAndPointersAsImages is unset and treatUrlsAndPointersAsMedia is set, Marqo returns an error where treatUrlsAndPointersAsImages cannot be False when treatUrlsAndPointersAsMedia is True - Add new video-audio model LanguageBind/Video_V1.5_FT_Audio_FT to the model registry. - Move languagebind tests from CPU to CUDA tests - Change audioPreprocessing chunk length from 20 to 10 --- .github/workflows/unit_test_200gb_CI.yml | 1 - RELEASE.md | 7 +++++++ requirements.dev.txt | 1 + requirements.txt | 3 ++- .../s2_inference/model_downloading/from_hf.py | 2 +- src/marqo/s2_inference/model_registry.py | 20 ++++++++++++++----- .../s2_inference/multimodal_model_load.py | 5 +++++ src/marqo/s2_inference/s2_inference.py | 3 ++- .../tensor_search/models/index_settings.py | 14 ++++++------- src/marqo/version.py | 2 +- .../index_management/test_get_settings.py | 10 ++++------ .../model_downloading/test_from_hf.py | 2 +- .../test_add_documents_combined.py | 6 ++++-- tests/tensor_search/integ_tests/test_embed.py | 3 ++- .../integ_tests/test_search_combined.py | 6 ++++-- tests/test_documentation.py | 1 - 16 files changed, 55 insertions(+), 31 deletions(-) diff --git a/.github/workflows/unit_test_200gb_CI.yml b/.github/workflows/unit_test_200gb_CI.yml index 2a3857697..3b5958fb6 100644 --- a/.github/workflows/unit_test_200gb_CI.yml +++ b/.github/workflows/unit_test_200gb_CI.yml @@ -136,7 +136,6 @@ jobs: export VESPA_CONFIG_URL=http://localhost:19071 export VESPA_DOCUMENT_URL=http://localhost:8080 export VESPA_QUERY_URL=http://localhost:8080 - export MARQO_MAX_CPU_MODEL_MEMORY=15 cd marqo export PYTHONPATH="./tests:./src:." diff --git a/RELEASE.md b/RELEASE.md index 345c17930..be468ee18 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,10 @@ +# Release 2.12.1 + +## Bug fixes and minor changes +- Fix a bug where when `treatUrlsAndPointersAsImages` is unset and `treatUrlsAndPointersAsMedia` is set, Marqo returns an error where `treatUrlsAndPointersAsImages` cannot be `False` when `treatUrlsAndPointersAsMedia` is `True` +- Add new video-audio model `LanguageBind/Video_V1.5_FT_Audio_FT` to the model registry. + + # Release 2.12.0 ## New features diff --git a/requirements.dev.txt b/requirements.dev.txt index 0b8f2512e..039473af2 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -17,6 +17,7 @@ kazoo==2.10.0 pycurl==7.45.3 certifi==2019.11.28 transformers==4.41.2 +huggingface-hub==0.25.0 # s2_inference: more_itertools diff --git a/requirements.txt b/requirements.txt index f8e9cf0eb..12ae5a8cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ cachetools==5.3.1 pynvml==11.5.0 # For cuda utilization readerwriterlock==1.0.9 kazoo==2.10.0 -pycurl==7.45.3 \ No newline at end of file +pycurl==7.45.3 +huggingface-hub==0.25.0 \ No newline at end of file diff --git a/src/marqo/s2_inference/model_downloading/from_hf.py b/src/marqo/s2_inference/model_downloading/from_hf.py index 97adcd2b2..1f68ec746 100644 --- a/src/marqo/s2_inference/model_downloading/from_hf.py +++ b/src/marqo/s2_inference/model_downloading/from_hf.py @@ -2,7 +2,7 @@ from typing import Optional from huggingface_hub import hf_hub_download from marqo.s2_inference.logger import get_logger -from huggingface_hub.utils._errors import RepositoryNotFoundError +from huggingface_hub.errors import RepositoryNotFoundError from marqo.s2_inference.errors import ModelDownloadError logger = get_logger(__name__) diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py index 2715e6413..cce092b7c 100644 --- a/src/marqo/s2_inference/model_registry.py +++ b/src/marqo/s2_inference/model_registry.py @@ -1995,7 +1995,17 @@ def _get_languagebind_properties() -> Dict: "model_size": 8, "supported_modalities": ["video", "audio", "language", "image"], "video_chunk_length": 20, - "audio_chunk_length": 20, + "audio_chunk_length": 10, + }, + 'LanguageBind/Video_V1.5_FT_Audio_FT': { + "name": "LanguageBind/Video_V1.5_FT_Audio_FT", + "dimensions": 768, + "type": "languagebind", + "loader": "languagebind", + "model_size": 5, + "supported_modalities": ["video", "audio", "language"], + "video_chunk_length": 20, + "audio_chunk_length": 10, }, 'LanguageBind/Video_V1.5_FT_Image': { "name": "LanguageBind/Video_V1.5_FT_Image", @@ -2005,7 +2015,7 @@ def _get_languagebind_properties() -> Dict: "model_size": 5, "supported_modalities": ["video", "language", "image"], "video_chunk_length": 20, - "audio_chunk_length": 20, + "audio_chunk_length": 10, }, 'LanguageBind/Audio_FT_Image': { "name": "LanguageBind/Audio_FT_Image", @@ -2015,7 +2025,7 @@ def _get_languagebind_properties() -> Dict: "model_size": 5, "supported_modalities": ["audio", "language", "image"], "video_chunk_length": 20, - "audio_chunk_length": 20, + "audio_chunk_length": 10, }, 'LanguageBind/Audio_FT': { "name": "LanguageBind/Audio_FT", @@ -2025,7 +2035,7 @@ def _get_languagebind_properties() -> Dict: "model_size": 2, "supported_modalities": ["video", "language"], "video_chunk_length": 20, - "audio_chunk_length": 20, + "audio_chunk_length": 10, }, 'LanguageBind/Video_V1.5_FT': { "name": "LanguageBind/Video_V1.5_FT", @@ -2035,7 +2045,7 @@ def _get_languagebind_properties() -> Dict: "model_size": 2, "supported_modalities": ["video", "language"], "video_chunk_length": 20, - "audio_chunk_length": 20, + "audio_chunk_length": 10, }, } diff --git a/src/marqo/s2_inference/multimodal_model_load.py b/src/marqo/s2_inference/multimodal_model_load.py index 91da39fec..5fc9d03a4 100644 --- a/src/marqo/s2_inference/multimodal_model_load.py +++ b/src/marqo/s2_inference/multimodal_model_load.py @@ -75,6 +75,11 @@ def _load_languagebind_model(self): 'audio': 'LanguageBind_Audio_FT', 'image': 'LanguageBind_Image', } + elif self.model_name == "LanguageBind/Video_V1.5_FT_Audio_FT": + self.clip_type = { + 'video': 'LanguageBind_Video_V1.5_FT', + 'audio': 'LanguageBind_Audio_FT', + } elif self.model_name == "LanguageBind/Video_V1.5_FT_Image": self.clip_type = { 'video': 'LanguageBind_Video_V1.5_FT', diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index 12ea3a89b..02d347535 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -482,7 +482,8 @@ def _check_memory_threshold_for_model(device: str, model_size: Union[float, int] raise ModelCacheManagementError( f"You are trying to load a model with size = `{model_size}` into device = `{device}`, which is larger than the device threshold = `{threshold}`. " f"Marqo CANNOT find enough space for the model. Please change the threshold by adjusting the environment variables.\n" - f"You can find more detailed information at `https://docs.marqo.ai/0.0.21/Advanced-Usage/configuration/`.") + f"Please modify the threshold by setting the environment variable `MARQO_MAX_CUDA_MODEL_MEMORY` or `MARQO_MAX_CPU_MODEL_MEMORY`." + f"You can find more detailed information at `https://docs.marqo.ai/latest/other-resources/guides/advanced-usage/configuration/`.") return (used_memory + model_size) < threshold diff --git a/src/marqo/tensor_search/models/index_settings.py b/src/marqo/tensor_search/models/index_settings.py index bfd4b824b..cd020bbb3 100644 --- a/src/marqo/tensor_search/models/index_settings.py +++ b/src/marqo/tensor_search/models/index_settings.py @@ -41,7 +41,7 @@ class IndexSettings(StrictBaseModel): splitOverlap=3, ) audioPreprocessing: core.AudioPreProcessing = core.AudioPreProcessing( - splitLength=20, + splitLength=10, splitOverlap=3, ) vectorNumericType: core.VectorNumericType = core.VectorNumericType.Float @@ -58,17 +58,12 @@ def validate_url_pointer_treatment(cls, values): treat_as_images = values.get('treatUrlsAndPointersAsImages') treat_as_media = values.get('treatUrlsAndPointersAsMedia') - if treat_as_images is None: - treat_as_images = False - if treat_as_media is None: - treat_as_media = False - if treat_as_images and not treat_as_media: # Deprecation warning import warnings warnings.warn("'treatUrlsAndPointersAsImages' is deprecated. Use 'treatUrlsAndPointersAsMedia' instead.", DeprecationWarning) - if not treat_as_images and treat_as_media: + if treat_as_images == False and treat_as_media: raise api_exceptions.InvalidArgError( "Invalid combination: 'treatUrlsAndPointersAsImages' cannot be False when 'treatUrlsAndPointersAsMedia' is True." ) @@ -161,7 +156,10 @@ def to_marqo_index_request(self, index_name: str) -> MarqoIndexRequest: if self.treatUrlsAndPointersAsImages is None: # Default value for treat_urls_and_pointers_as_images is False, but we can't set it in the model # as it is not a valid parameter for structured indexes - self.treatUrlsAndPointersAsImages = False + if self.treatUrlsAndPointersAsMedia is True: + self.treatUrlsAndPointersAsImages = True + else: + self.treatUrlsAndPointersAsImages = False if self.treatUrlsAndPointersAsMedia is None: # Default value for treat_urls_and_pointers_as_media is False, but we can't set it in the model diff --git a/src/marqo/version.py b/src/marqo/version.py index 0daf82c48..1b87bdac2 100644 --- a/src/marqo/version.py +++ b/src/marqo/version.py @@ -1,4 +1,4 @@ -__version__ = "2.12.0" +__version__ = "2.12.1" def get_version() -> str: return f"{__version__}" diff --git a/tests/core/index_management/test_get_settings.py b/tests/core/index_management/test_get_settings.py index a07496526..30f31ad5d 100644 --- a/tests/core/index_management/test_get_settings.py +++ b/tests/core/index_management/test_get_settings.py @@ -95,7 +95,7 @@ def test_default_settings(self): 'textPreprocessing': {'splitLength': 2, 'splitMethod': TextSplitMethod.Sentence, 'splitOverlap': 0}, - 'audioPreprocessing': {'splitLength': 20, 'splitOverlap': 3}, + 'audioPreprocessing': {'splitLength': 10, 'splitOverlap': 3}, 'videoPreprocessing': {'splitLength': 20, 'splitOverlap': 3}, 'treatUrlsAndPointersAsImages': False, 'treatUrlsAndPointersAsMedia': False, @@ -136,7 +136,7 @@ def test_default_settings(self): 'splitMethod': TextSplitMethod.Sentence, 'splitOverlap': 0 }, - 'audioPreprocessing': {'splitLength': 20, 'splitOverlap': 3}, + 'audioPreprocessing': {'splitLength': 10, 'splitOverlap': 3}, 'videoPreprocessing': {'splitLength': 20, 'splitOverlap': 3}, 'type': IndexType.Structured, 'vectorNumericType': VectorNumericType.Float @@ -165,7 +165,7 @@ def test_custom_settings(self): 'textPreprocessing': {'splitLength': 3, 'splitMethod': TextSplitMethod.Word, 'splitOverlap': 1}, - 'audioPreprocessing': {'splitLength': 20, 'splitOverlap': 3}, + 'audioPreprocessing': {'splitLength': 10, 'splitOverlap': 3}, 'videoPreprocessing': {'splitLength': 20, 'splitOverlap': 3}, 'treatUrlsAndPointersAsImages': False, 'treatUrlsAndPointersAsMedia': False, @@ -175,7 +175,6 @@ def test_custom_settings(self): # Get unstructured custom settings retrieved_index = self.config.index_management.get_index(self.unstructured_custom_index.name) retrieved_settings = IndexSettings.from_marqo_index(retrieved_index).dict(exclude_none=True, by_alias=True) - print(f"retrieved_settings: {retrieved_settings}") self.assertEqual(retrieved_settings, expected_unstructured_custom_settings) with self.subTest("Structured index custom settings"): @@ -206,7 +205,7 @@ def test_custom_settings(self): 'splitMethod': TextSplitMethod.Word, 'splitOverlap': 1 }, - 'audioPreprocessing': {'splitLength': 20, 'splitOverlap': 3}, + 'audioPreprocessing': {'splitLength': 10, 'splitOverlap': 3}, 'videoPreprocessing': {'splitLength': 20, 'splitOverlap': 3}, 'type': IndexType.Structured, 'vectorNumericType': VectorNumericType.Float @@ -214,6 +213,5 @@ def test_custom_settings(self): # Get unstructured default settings retrieved_index = self.config.index_management.get_index(self.structured_custom_index.name) retrieved_settings = IndexSettings.from_marqo_index(retrieved_index).dict(exclude_none=True, by_alias=True) - print(f"retrieved_settings: {retrieved_settings}") self.assertEqual(retrieved_settings, expected_structured_custom_settings) \ No newline at end of file diff --git a/tests/s2_inference/model_downloading/test_from_hf.py b/tests/s2_inference/model_downloading/test_from_hf.py index d799e6819..02e02cda5 100644 --- a/tests/s2_inference/model_downloading/test_from_hf.py +++ b/tests/s2_inference/model_downloading/test_from_hf.py @@ -3,7 +3,7 @@ from marqo.s2_inference.errors import ModelDownloadError from marqo.tensor_search.models.external_apis.hf import HfAuth, HfModelLocation from marqo.s2_inference.model_downloading.from_hf import download_model_from_hf -from huggingface_hub.utils._errors import RepositoryNotFoundError +from huggingface_hub.errors import RepositoryNotFoundError from marqo.s2_inference.configs import ModelCache diff --git a/tests/tensor_search/integ_tests/test_add_documents_combined.py b/tests/tensor_search/integ_tests/test_add_documents_combined.py index d5113a5fa..9d80e0fb1 100644 --- a/tests/tensor_search/integ_tests/test_add_documents_combined.py +++ b/tests/tensor_search/integ_tests/test_add_documents_combined.py @@ -179,7 +179,8 @@ def test_add_document_callVectoriseWithoutPassingEnableCache(self): "vectorise for add_documents") mock_vectorise.reset_mock() - @pytest.mark.skipif(torch.cuda.is_available() is True, reason="GPU testing device needs to be investigated") + @pytest.mark.largemodel + @pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support") def test_add_multimodal_single_documents(self): """ """ documents = [ @@ -240,7 +241,8 @@ def test_add_multimodal_single_documents(self): self.assertNotIn(embedding, embeddings, f"Duplicate embedding found in document {i}") embeddings.append(embedding) - @pytest.mark.skipif(torch.cuda.is_available() is True, reason="GPU testing device needs to be investigated") + @pytest.mark.largemodel + @pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support") def test_add_multimodal_field_document(self): multimodal_document = { "_id": "1_multimodal", diff --git a/tests/tensor_search/integ_tests/test_embed.py b/tests/tensor_search/integ_tests/test_embed.py index 5f94c1652..3ab73abe1 100644 --- a/tests/tensor_search/integ_tests/test_embed.py +++ b/tests/tensor_search/integ_tests/test_embed.py @@ -272,7 +272,8 @@ def test_embed_image_url_as_image_not_text(self): msg=f"Mismatch at index {i} for {index.type}") - @pytest.mark.skipif(torch.cuda.is_available() is True, reason="Skip this test if we have cuda support.") + @pytest.mark.largemodel + @pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support") def test_embed_languagebind(self): content = [ #TestImageUrls.HIPPO_REALISTIC.value, # image diff --git a/tests/tensor_search/integ_tests/test_search_combined.py b/tests/tensor_search/integ_tests/test_search_combined.py index 046bdea5d..4e2839ec0 100644 --- a/tests/tensor_search/integ_tests/test_search_combined.py +++ b/tests/tensor_search/integ_tests/test_search_combined.py @@ -198,7 +198,8 @@ def tearDown(self) -> None: super().tearDown() self.device_patcher.stop() - @pytest.mark.skipif(torch.cuda.is_available() is True, reason="We skip this test if we have cuda support. This model is 5gb and is very slow on g4dn.xlarge and may crash it") + @pytest.mark.largemodel + @pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support") def test_search_video(self): documents = [ {"video_field_1": "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4", "_id": "1"}, @@ -232,7 +233,8 @@ def test_search_video(self): self.assertEqual(results['hits'][0]['_id'], "1") # The video document should be the top result self.assertGreater(results['hits'][0]['_score'], results['hits'][1]['_score']) # Video should have higher score - @pytest.mark.skipif(torch.cuda.is_available() is True, reason="We skip this test if we have cuda support. This model is 5gb and is very slow on g4dn.xlarge and may crash it") + @pytest.mark.largemodel + @pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support") def test_search_audio(self): documents = [ {"video_field_1": "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4", "_id": "1"}, diff --git a/tests/test_documentation.py b/tests/test_documentation.py index 9767a82b7..615094205 100644 --- a/tests/test_documentation.py +++ b/tests/test_documentation.py @@ -5,7 +5,6 @@ from marqo import marqo_docs -@pytest.mark.skip(reason="skipping since we have a CI pipeline for this") class TestDocumentation(unittest.TestCase): def test_urls(self): # Retrieve all public functions in the module