From 3a38143c62553c72f669225da5dad1f260420238 Mon Sep 17 00:00:00 2001 From: Shri Javadekar Date: Mon, 6 May 2024 16:06:26 -0700 Subject: [PATCH] Increase Azure Blobstore connection_timeout. --- metaflow/plugins/datastores/azure_storage.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/metaflow/plugins/datastores/azure_storage.py b/metaflow/plugins/datastores/azure_storage.py index 9ab6dc1a959..0b3fe9ee787 100644 --- a/metaflow/plugins/datastores/azure_storage.py +++ b/metaflow/plugins/datastores/azure_storage.py @@ -143,12 +143,19 @@ def save_bytes_single( # It is good enough 99.9% of the time. # Depending on ResourceExistsError is more costly, though # we are still going to handle it right. + + # The default timeout in the Azure blobstore python SDK + # doesn't work well on slower network connections and largish + # files. Hence increasing the connection_timeout below. + # For more details, see this: + # https://github.com/Azure/azure-sdk-for-python/issues/23232 if overwrite or not blob.exists(): blob.upload_blob( byte_stream, overwrite=overwrite, metadata=metadata_to_upload, max_concurrency=AZURE_STORAGE_UPLOAD_MAX_CONCURRENCY, + connection_timeout=14400, ) except ResourceExistsError: if overwrite: