Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multimodal patches #971

Merged
merged 14 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/unit_test_200gb_CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ jobs:
export VESPA_CONFIG_URL=http://localhost:19071
export VESPA_DOCUMENT_URL=http://localhost:8080
export VESPA_QUERY_URL=http://localhost:8080
export MARQO_MAX_CPU_MODEL_MEMORY=15

cd marqo
export PYTHONPATH="./tests:./src:."
Expand Down
7 changes: 7 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# Release 2.12.1

## Bug fixes and minor changes
- Fix a bug where when `treatUrlsAndPointersAsImages` is unset and `treatUrlsAndPointersAsMedia` is set, Marqo returns an error where `treatUrlsAndPointersAsImages` cannot be `False` when `treatUrlsAndPointersAsMedia` is `True`
- Add new video-audio model `LanguageBind/Video_V1.5_FT_Audio_FT` to the model registry.


# Release 2.12.0

## New features
Expand Down
1 change: 1 addition & 0 deletions requirements.dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ kazoo==2.10.0
pycurl==7.45.3
certifi==2019.11.28
transformers==4.41.2
huggingface-hub==0.25.0

# s2_inference:
more_itertools
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ cachetools==5.3.1
pynvml==11.5.0 # For cuda utilization
readerwriterlock==1.0.9
kazoo==2.10.0
pycurl==7.45.3
pycurl==7.45.3
huggingface-hub==0.25.0
2 changes: 1 addition & 1 deletion src/marqo/s2_inference/model_downloading/from_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Optional
from huggingface_hub import hf_hub_download
from marqo.s2_inference.logger import get_logger
from huggingface_hub.utils._errors import RepositoryNotFoundError
from huggingface_hub.errors import RepositoryNotFoundError
from marqo.s2_inference.errors import ModelDownloadError

logger = get_logger(__name__)
Expand Down
20 changes: 15 additions & 5 deletions src/marqo/s2_inference/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1995,7 +1995,17 @@ def _get_languagebind_properties() -> Dict:
"model_size": 8,
"supported_modalities": ["video", "audio", "language", "image"],
"video_chunk_length": 20,
"audio_chunk_length": 20,
"audio_chunk_length": 10,
},
'LanguageBind/Video_V1.5_FT_Audio_FT': {
"name": "LanguageBind/Video_V1.5_FT_Audio_FT",
"dimensions": 768,
"type": "languagebind",
"loader": "languagebind",
"model_size": 5,
"supported_modalities": ["video", "audio", "language"],
"video_chunk_length": 20,
"audio_chunk_length": 10,
},
'LanguageBind/Video_V1.5_FT_Image': {
"name": "LanguageBind/Video_V1.5_FT_Image",
Expand All @@ -2005,7 +2015,7 @@ def _get_languagebind_properties() -> Dict:
"model_size": 5,
"supported_modalities": ["video", "language", "image"],
"video_chunk_length": 20,
"audio_chunk_length": 20,
"audio_chunk_length": 10,
},
'LanguageBind/Audio_FT_Image': {
"name": "LanguageBind/Audio_FT_Image",
Expand All @@ -2015,7 +2025,7 @@ def _get_languagebind_properties() -> Dict:
"model_size": 5,
"supported_modalities": ["audio", "language", "image"],
"video_chunk_length": 20,
"audio_chunk_length": 20,
"audio_chunk_length": 10,
},
'LanguageBind/Audio_FT': {
"name": "LanguageBind/Audio_FT",
Expand All @@ -2025,7 +2035,7 @@ def _get_languagebind_properties() -> Dict:
"model_size": 2,
"supported_modalities": ["video", "language"],
"video_chunk_length": 20,
"audio_chunk_length": 20,
"audio_chunk_length": 10,
},
'LanguageBind/Video_V1.5_FT': {
"name": "LanguageBind/Video_V1.5_FT",
Expand All @@ -2035,7 +2045,7 @@ def _get_languagebind_properties() -> Dict:
"model_size": 2,
"supported_modalities": ["video", "language"],
"video_chunk_length": 20,
"audio_chunk_length": 20,
"audio_chunk_length": 10,
},

}
Expand Down
5 changes: 5 additions & 0 deletions src/marqo/s2_inference/multimodal_model_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ def _load_languagebind_model(self):
'audio': 'LanguageBind_Audio_FT',
'image': 'LanguageBind_Image',
}
elif self.model_name == "LanguageBind/Video_V1.5_FT_Audio_FT":
self.clip_type = {
'video': 'LanguageBind_Video_V1.5_FT',
'audio': 'LanguageBind_Audio_FT',
}
elif self.model_name == "LanguageBind/Video_V1.5_FT_Image":
self.clip_type = {
'video': 'LanguageBind_Video_V1.5_FT',
Expand Down
3 changes: 2 additions & 1 deletion src/marqo/s2_inference/s2_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,8 @@ def _check_memory_threshold_for_model(device: str, model_size: Union[float, int]
raise ModelCacheManagementError(
f"You are trying to load a model with size = `{model_size}` into device = `{device}`, which is larger than the device threshold = `{threshold}`. "
f"Marqo CANNOT find enough space for the model. Please change the threshold by adjusting the environment variables.\n"
f"You can find more detailed information at `https://docs.marqo.ai/0.0.21/Advanced-Usage/configuration/`.")
f"Please modify the threshold by setting the environment variable `MARQO_MAX_CUDA_MODEL_MEMORY` or `MARQO_MAX_CPU_MODEL_MEMORY`."
f"You can find more detailed information at `https://docs.marqo.ai/latest/other-resources/guides/advanced-usage/configuration/`.")
papa99do marked this conversation as resolved.
Show resolved Hide resolved
return (used_memory + model_size) < threshold


Expand Down
14 changes: 6 additions & 8 deletions src/marqo/tensor_search/models/index_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class IndexSettings(StrictBaseModel):
splitOverlap=3,
)
audioPreprocessing: core.AudioPreProcessing = core.AudioPreProcessing(
splitLength=20,
splitLength=10,
splitOverlap=3,
)
vectorNumericType: core.VectorNumericType = core.VectorNumericType.Float
Expand All @@ -58,17 +58,12 @@ def validate_url_pointer_treatment(cls, values):
treat_as_images = values.get('treatUrlsAndPointersAsImages')
treat_as_media = values.get('treatUrlsAndPointersAsMedia')

if treat_as_images is None:
treat_as_images = False
if treat_as_media is None:
treat_as_media = False

if treat_as_images and not treat_as_media:
# Deprecation warning
import warnings
warnings.warn("'treatUrlsAndPointersAsImages' is deprecated. Use 'treatUrlsAndPointersAsMedia' instead.", DeprecationWarning)
papa99do marked this conversation as resolved.
Show resolved Hide resolved

if not treat_as_images and treat_as_media:
if treat_as_images == False and treat_as_media:
raise api_exceptions.InvalidArgError(
"Invalid combination: 'treatUrlsAndPointersAsImages' cannot be False when 'treatUrlsAndPointersAsMedia' is True."
)
Expand Down Expand Up @@ -161,7 +156,10 @@ def to_marqo_index_request(self, index_name: str) -> MarqoIndexRequest:
if self.treatUrlsAndPointersAsImages is None:
# Default value for treat_urls_and_pointers_as_images is False, but we can't set it in the model
# as it is not a valid parameter for structured indexes
self.treatUrlsAndPointersAsImages = False
if self.treatUrlsAndPointersAsMedia is True:
self.treatUrlsAndPointersAsImages = True
else:
self.treatUrlsAndPointersAsImages = False

if self.treatUrlsAndPointersAsMedia is None:
# Default value for treat_urls_and_pointers_as_media is False, but we can't set it in the model
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "2.12.0"
__version__ = "2.12.1"

def get_version() -> str:
return f"{__version__}"
10 changes: 4 additions & 6 deletions tests/core/index_management/test_get_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_default_settings(self):
'textPreprocessing': {'splitLength': 2,
'splitMethod': TextSplitMethod.Sentence,
'splitOverlap': 0},
'audioPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'audioPreprocessing': {'splitLength': 10, 'splitOverlap': 3},
'videoPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'treatUrlsAndPointersAsImages': False,
'treatUrlsAndPointersAsMedia': False,
Expand Down Expand Up @@ -136,7 +136,7 @@ def test_default_settings(self):
'splitMethod': TextSplitMethod.Sentence,
'splitOverlap': 0
},
'audioPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'audioPreprocessing': {'splitLength': 10, 'splitOverlap': 3},
'videoPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'type': IndexType.Structured,
'vectorNumericType': VectorNumericType.Float
Expand Down Expand Up @@ -165,7 +165,7 @@ def test_custom_settings(self):
'textPreprocessing': {'splitLength': 3,
'splitMethod': TextSplitMethod.Word,
'splitOverlap': 1},
'audioPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'audioPreprocessing': {'splitLength': 10, 'splitOverlap': 3},
'videoPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'treatUrlsAndPointersAsImages': False,
'treatUrlsAndPointersAsMedia': False,
Expand All @@ -175,7 +175,6 @@ def test_custom_settings(self):
# Get unstructured custom settings
retrieved_index = self.config.index_management.get_index(self.unstructured_custom_index.name)
retrieved_settings = IndexSettings.from_marqo_index(retrieved_index).dict(exclude_none=True, by_alias=True)
print(f"retrieved_settings: {retrieved_settings}")
self.assertEqual(retrieved_settings, expected_unstructured_custom_settings)

with self.subTest("Structured index custom settings"):
Expand Down Expand Up @@ -206,14 +205,13 @@ def test_custom_settings(self):
'splitMethod': TextSplitMethod.Word,
'splitOverlap': 1
},
'audioPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'audioPreprocessing': {'splitLength': 10, 'splitOverlap': 3},
'videoPreprocessing': {'splitLength': 20, 'splitOverlap': 3},
'type': IndexType.Structured,
'vectorNumericType': VectorNumericType.Float
}
# Get unstructured default settings
retrieved_index = self.config.index_management.get_index(self.structured_custom_index.name)
retrieved_settings = IndexSettings.from_marqo_index(retrieved_index).dict(exclude_none=True, by_alias=True)
print(f"retrieved_settings: {retrieved_settings}")
self.assertEqual(retrieved_settings, expected_structured_custom_settings)

2 changes: 1 addition & 1 deletion tests/s2_inference/model_downloading/test_from_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from marqo.s2_inference.errors import ModelDownloadError
from marqo.tensor_search.models.external_apis.hf import HfAuth, HfModelLocation
from marqo.s2_inference.model_downloading.from_hf import download_model_from_hf
from huggingface_hub.utils._errors import RepositoryNotFoundError
from huggingface_hub.errors import RepositoryNotFoundError
from marqo.s2_inference.configs import ModelCache


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ def test_add_document_callVectoriseWithoutPassingEnableCache(self):
"vectorise for add_documents")
mock_vectorise.reset_mock()

@pytest.mark.skipif(torch.cuda.is_available() is True, reason="GPU testing device needs to be investigated")
@pytest.mark.largemodel
@pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support")
def test_add_multimodal_single_documents(self):
""" """
documents = [
Expand Down Expand Up @@ -240,7 +241,8 @@ def test_add_multimodal_single_documents(self):
self.assertNotIn(embedding, embeddings, f"Duplicate embedding found in document {i}")
embeddings.append(embedding)

@pytest.mark.skipif(torch.cuda.is_available() is True, reason="GPU testing device needs to be investigated")
@pytest.mark.largemodel
@pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support")
def test_add_multimodal_field_document(self):
multimodal_document = {
"_id": "1_multimodal",
Expand Down
3 changes: 2 additions & 1 deletion tests/tensor_search/integ_tests/test_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,8 @@ def test_embed_image_url_as_image_not_text(self):
msg=f"Mismatch at index {i} for {index.type}")


@pytest.mark.skipif(torch.cuda.is_available() is True, reason="Skip this test if we have cuda support.")
@pytest.mark.largemodel
@pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support")
def test_embed_languagebind(self):
content = [
#TestImageUrls.HIPPO_REALISTIC.value, # image
Expand Down
6 changes: 4 additions & 2 deletions tests/tensor_search/integ_tests/test_search_combined.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ def tearDown(self) -> None:
super().tearDown()
self.device_patcher.stop()

@pytest.mark.skipif(torch.cuda.is_available() is True, reason="We skip this test if we have cuda support. This model is 5gb and is very slow on g4dn.xlarge and may crash it")
@pytest.mark.largemodel
@pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support")
def test_search_video(self):
documents = [
{"video_field_1": "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4", "_id": "1"},
Expand Down Expand Up @@ -232,7 +233,8 @@ def test_search_video(self):
self.assertEqual(results['hits'][0]['_id'], "1") # The video document should be the top result
self.assertGreater(results['hits'][0]['_score'], results['hits'][1]['_score']) # Video should have higher score

@pytest.mark.skipif(torch.cuda.is_available() is True, reason="We skip this test if we have cuda support. This model is 5gb and is very slow on g4dn.xlarge and may crash it")
@pytest.mark.largemodel
@pytest.mark.skipif(torch.cuda.is_available() is False, reason="We skip the large model test if we don't have cuda support")
def test_search_audio(self):
documents = [
{"video_field_1": "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4", "_id": "1"},
Expand Down
1 change: 0 additions & 1 deletion tests/test_documentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from marqo import marqo_docs

@pytest.mark.skip(reason="skipping since we have a CI pipeline for this")
class TestDocumentation(unittest.TestCase):
def test_urls(self):
# Retrieve all public functions in the module
Expand Down
Loading