⚡️ Speed up method S3DataSource.put_bucket_versioning by 48%
#630
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 48% (0.48x) speedup for
S3DataSource.put_bucket_versioninginbackend/python/app/sources/external/s3/s3.py⏱️ Runtime :
1.34 milliseconds→909 microseconds(best of230runs)📝 Explanation and details
The optimized code achieves a 47% runtime speedup by eliminating redundant S3 client creation overhead through client connection pooling.
Key Optimization: Cached S3 Client Context
async with session.client('s3')context for everyput_bucket_versioningcall, requiring connection setup/teardown each time_get_s3_client()method that caches the S3 client instance (_s3_client_instance) and reuses the same connection across multiple callsPerformance Impact Analysis:
The line profiler shows the critical improvement in
put_bucket_versioning:async with session.client('s3')(1.48762e+06 ns across 1274 hits)Throughput Benefits:
Test Case Performance:
The optimization particularly benefits workloads with:
Resource Management:
The optimization includes proper cleanup via
_finalize()and__aexit__()methods to ensure the cached client context is properly closed, preventing resource leaks while maintaining the performance benefits of connection reuse.✅ Correctness verification report:
🌀 Generated Regression Tests and Runtime
import asyncio # used to run async functions
from typing import Any, Dict, Optional
import pytest # used for our unit tests
from app.sources.external.s3.s3 import S3DataSource
Minimal stub for aioboto3.Session and s3_client
class DummyS3Client:
def init(self, response=None, raise_client_error=False, raise_exception=False):
self.response = response
self.raise_client_error = raise_client_error
self.raise_exception = raise_exception
class DummySession:
def init(self, s3_client):
self._s3_client = s3_client
class DummyS3ClientWrapper:
def init(self, session):
self._session = session
--- Function under test (EXACT COPY) ---
try:
import aioboto3 # type: ignore
from botocore.exceptions import ClientError # type: ignore
except ImportError:
# For test environment, we don't actually need aioboto3
aioboto3 = None
ClientError = Exception
--- TESTS ---
@pytest.mark.asyncio
async def test_put_bucket_versioning_basic_success():
# Basic test: normal successful response
dummy_response = {'Versioning': 'Enabled'}
s3_client = DummyS3Client(response=dummy_response)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
result = await s3ds.put_bucket_versioning(
Bucket="my-bucket",
VersioningConfiguration={"Status": "Enabled"}
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_basic_error_dict():
# Basic test: S3 returns error in dict
dummy_response = {'Error': {'Code': 'InvalidRequest', 'Message': 'Invalid bucket'}}
s3_client = DummyS3Client(response=dummy_response)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
result = await s3ds.put_bucket_versioning(
Bucket="bad-bucket",
VersioningConfiguration={"Status": "Suspended"}
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_basic_none_response():
# Edge case: None response from S3
s3_client = DummyS3Client(response=None)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
result = await s3ds.put_bucket_versioning(
Bucket="my-bucket",
VersioningConfiguration={"Status": "Enabled"}
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_with_all_optional_parameters():
# Basic test: All optional parameters provided
dummy_response = {'Versioning': 'Enabled'}
s3_client = DummyS3Client(response=dummy_response)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
result = await s3ds.put_bucket_versioning(
Bucket="my-bucket",
VersioningConfiguration={"Status": "Enabled"},
ChecksumAlgorithm="SHA256",
MFA="123456",
ExpectedBucketOwner="ownerid"
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_clienterror_handling():
# Edge case: Simulate ClientError exception
s3_client = DummyS3Client(raise_client_error=True)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
result = await s3ds.put_bucket_versioning(
Bucket="forbidden-bucket",
VersioningConfiguration={"Status": "Enabled"}
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_unexpected_exception():
# Edge case: Simulate generic exception
s3_client = DummyS3Client(raise_exception=True)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
result = await s3ds.put_bucket_versioning(
Bucket="my-bucket",
VersioningConfiguration={"Status": "Enabled"}
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_concurrent_execution():
# Edge case: Run multiple concurrent requests
dummy_response = {'Versioning': 'Enabled'}
s3_client = DummyS3Client(response=dummy_response)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
@pytest.mark.asyncio
async def test_put_bucket_versioning_large_scale_concurrent():
# Large scale: Run 100 concurrent requests
dummy_response = {'Versioning': 'Enabled'}
s3_client = DummyS3Client(response=dummy_response)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
@pytest.mark.asyncio
async def test_put_bucket_versioning_edge_missing_bucket():
# Edge case: Missing required Bucket parameter (should raise TypeError)
dummy_response = {'Versioning': 'Enabled'}
s3_client = DummyS3Client(response=dummy_response)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
with pytest.raises(TypeError):
await s3ds.put_bucket_versioning(
VersioningConfiguration={"Status": "Enabled"}
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_edge_missing_versioning_configuration():
# Edge case: Missing required VersioningConfiguration parameter (should raise TypeError)
dummy_response = {'Versioning': 'Enabled'}
s3_client = DummyS3Client(response=dummy_response)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
with pytest.raises(TypeError):
await s3ds.put_bucket_versioning(
Bucket="my-bucket"
)
--- Throughput tests ---
@pytest.mark.asyncio
async def test_put_bucket_versioning_throughput_small_load():
# Throughput test: 10 requests
dummy_response = {'Versioning': 'Enabled'}
s3_client = DummyS3Client(response=dummy_response)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
tasks = [
s3ds.put_bucket_versioning(
Bucket=f"bucket-{i}",
VersioningConfiguration={"Status": "Enabled"}
) for i in range(10)
]
results = await asyncio.gather(*tasks)
@pytest.mark.asyncio
async def test_put_bucket_versioning_throughput_medium_load():
# Throughput test: 50 requests
dummy_response = {'Versioning': 'Enabled'}
s3_client = DummyS3Client(response=dummy_response)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
tasks = [
s3ds.put_bucket_versioning(
Bucket=f"bucket-{i}",
VersioningConfiguration={"Status": "Enabled"}
) for i in range(50)
]
results = await asyncio.gather(*tasks)
@pytest.mark.asyncio
async def test_put_bucket_versioning_throughput_high_load():
# Throughput test: 200 requests (stress test, but <1000)
dummy_response = {'Versioning': 'Enabled'}
s3_client = DummyS3Client(response=dummy_response)
session = DummySession(s3_client)
client_wrapper = DummyS3ClientWrapper(session)
s3ds = S3DataSource(client_wrapper)
tasks = [
s3ds.put_bucket_versioning(
Bucket=f"bucket-{i}",
VersioningConfiguration={"Status": "Enabled"}
) for i in range(200)
]
results = await asyncio.gather(*tasks)
@pytest.mark.asyncio
async def test_put_bucket_versioning_throughput_mixed_load():
# Throughput test: Mix of successful and error responses
dummy_response = {'Versioning': 'Enabled'}
error_response = {'Error': {'Code': 'InvalidRequest', 'Message': 'Invalid bucket'}}
clients = [
DummyS3Client(response=dummy_response) if i % 2 == 0 else DummyS3Client(response=error_response)
for i in range(20)
]
sessions = [DummySession(c) for c in clients]
wrappers = [DummyS3ClientWrapper(s) for s in sessions]
sources = [S3DataSource(w) for w in wrappers]
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
import asyncio # used to run async functions
from typing import Any, Dict, Optional
import pytest # used for our unit tests
from app.sources.external.s3.s3 import S3DataSource
class ClientError(Exception):
def init(self, error_response, operation_name):
self.response = error_response
self.operation_name = operation_name
super().init(str(error_response))
class DummyS3Client:
"""Mock S3 client with put_bucket_versioning method."""
def init(self, response=None, raise_client_error=False, raise_exception=False):
self.response = response
self.raise_client_error = raise_client_error
self.raise_exception = raise_exception
class DummySessionClient:
"""Mock aioboto3 session.client context manager."""
def init(self, s3_client):
self.s3_client = s3_client
class DummySession:
"""Mock aioboto3.Session with client() returning async context manager."""
def init(self, s3_client):
self.s3_client = s3_client
class DummyS3ClientBuilder:
"""Mock S3Client.get_session() returning DummySession."""
def init(self, s3_client):
self._session = DummySession(s3_client)
------------------- UNIT TESTS -------------------
1. Basic Test Cases
@pytest.mark.asyncio
async def test_put_bucket_versioning_basic_success():
"""Test basic successful call with required parameters."""
# Simulate S3 API returns a dict with no error
response = {'Versioning': 'Enabled'}
s3_client = DummyS3ClientBuilder(DummyS3Client(response=response))
ds = S3DataSource(s3_client)
result = await ds.put_bucket_versioning(
Bucket='mybucket',
VersioningConfiguration={'Status': 'Enabled'}
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_basic_error_response_dict():
"""Test S3 returns error in response dict."""
error_response = {'Error': {'Code': 'AccessDenied', 'Message': 'You do not have permission'}}
s3_client = DummyS3ClientBuilder(DummyS3Client(response=error_response))
ds = S3DataSource(s3_client)
result = await ds.put_bucket_versioning(
Bucket='mybucket',
VersioningConfiguration={'Status': 'Suspended'}
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_basic_none_response():
"""Test S3 returns None as response."""
s3_client = DummyS3ClientBuilder(DummyS3Client(response=None))
ds = S3DataSource(s3_client)
result = await ds.put_bucket_versioning(
Bucket='mybucket',
VersioningConfiguration={'Status': 'Enabled'}
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_basic_non_dict_response():
"""Test S3 returns non-dict response (e.g., string)."""
s3_client = DummyS3ClientBuilder(DummyS3Client(response="OK"))
ds = S3DataSource(s3_client)
result = await ds.put_bucket_versioning(
Bucket='mybucket',
VersioningConfiguration={'Status': 'Suspended'}
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_basic_optional_parameters():
"""Test all optional parameters are passed and handled."""
response = {'Versioning': 'Enabled'}
s3_client = DummyS3ClientBuilder(DummyS3Client(response=response))
ds = S3DataSource(s3_client)
result = await ds.put_bucket_versioning(
Bucket='bucket123',
VersioningConfiguration={'Status': 'Enabled'},
ChecksumAlgorithm='SHA256',
MFA='123456 789012',
ExpectedBucketOwner='ownerid'
)
2. Edge Test Cases
@pytest.mark.asyncio
async def test_put_bucket_versioning_clienterror():
"""Test S3 raises ClientError exception."""
s3_client = DummyS3ClientBuilder(DummyS3Client(raise_client_error=True))
ds = S3DataSource(s3_client)
result = await ds.put_bucket_versioning(
Bucket='bucket',
VersioningConfiguration={'Status': 'Enabled'}
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_unexpected_exception():
"""Test S3 raises unexpected exception."""
s3_client = DummyS3ClientBuilder(DummyS3Client(raise_exception=True))
ds = S3DataSource(s3_client)
result = await ds.put_bucket_versioning(
Bucket='bucket',
VersioningConfiguration={'Status': 'Enabled'}
)
@pytest.mark.asyncio
async def test_put_bucket_versioning_concurrent_execution():
"""Test concurrent execution of multiple put_bucket_versioning calls."""
# Each call returns a different response
responses = [
{'Versioning': 'Enabled'},
{'Versioning': 'Suspended'},
{'Error': {'Code': 'AccessDenied', 'Message': 'No permission'}}
]
s3_clients = [
DummyS3ClientBuilder(DummyS3Client(response=responses[0])),
DummyS3ClientBuilder(DummyS3Client(response=responses[1])),
DummyS3ClientBuilder(DummyS3Client(response=responses[2]))
]
data_sources = [S3DataSource(c) for c in s3_clients]
coros = [
ds.put_bucket_versioning(Bucket=f'bucket{i}', VersioningConfiguration={'Status': 'Enabled'})
for i, ds in enumerate(data_sources)
]
results = await asyncio.gather(*coros)
@pytest.mark.asyncio
async def test_put_bucket_versioning_invalid_versioning_configuration():
"""Test invalid VersioningConfiguration value."""
# S3 returns error in response dict
error_response = {'Error': {'Code': 'MalformedXML', 'Message': 'The XML you provided was not well-formed'}}
s3_client = DummyS3ClientBuilder(DummyS3Client(response=error_response))
ds = S3DataSource(s3_client)
result = await ds.put_bucket_versioning(
Bucket='bucket',
VersioningConfiguration={'InvalidKey': 'InvalidValue'}
)
3. Large Scale Test Cases
@pytest.mark.asyncio
async def test_put_bucket_versioning_many_concurrent_calls():
"""Test large scale concurrent execution (up to 50 calls)."""
N = 50
response = {'Versioning': 'Enabled'}
s3_clients = [DummyS3ClientBuilder(DummyS3Client(response=response)) for _ in range(N)]
data_sources = [S3DataSource(c) for c in s3_clients]
coros = [
ds.put_bucket_versioning(Bucket=f'bucket{i}', VersioningConfiguration={'Status': 'Enabled'})
for i, ds in enumerate(data_sources)
]
results = await asyncio.gather(*coros)
for r in results:
pass
@pytest.mark.asyncio
async def test_put_bucket_versioning_many_concurrent_errors():
"""Test large scale concurrent errors (up to 20 calls)."""
N = 20
error_response = {'Error': {'Code': 'AccessDenied', 'Message': 'Denied'}}
s3_clients = [DummyS3ClientBuilder(DummyS3Client(response=error_response)) for _ in range(N)]
data_sources = [S3DataSource(c) for c in s3_clients]
coros = [
ds.put_bucket_versioning(Bucket=f'bucket{i}', VersioningConfiguration={'Status': 'Suspended'})
for i, ds in enumerate(data_sources)
]
results = await asyncio.gather(*coros)
for r in results:
pass
4. Throughput Test Cases
@pytest.mark.asyncio
async def test_put_bucket_versioning_throughput_small_load():
"""Throughput test: small load (5 concurrent calls)."""
N = 5
response = {'Versioning': 'Enabled'}
s3_clients = [DummyS3ClientBuilder(DummyS3Client(response=response)) for _ in range(N)]
data_sources = [S3DataSource(c) for c in s3_clients]
coros = [
ds.put_bucket_versioning(Bucket=f'bucket{i}', VersioningConfiguration={'Status': 'Enabled'})
for i, ds in enumerate(data_sources)
]
results = await asyncio.gather(*coros)
for r in results:
pass
@pytest.mark.asyncio
async def test_put_bucket_versioning_throughput_medium_load():
"""Throughput test: medium load (25 concurrent calls)."""
N = 25
response = {'Versioning': 'Suspended'}
s3_clients = [DummyS3ClientBuilder(DummyS3Client(response=response)) for _ in range(N)]
data_sources = [S3DataSource(c) for c in s3_clients]
coros = [
ds.put_bucket_versioning(Bucket=f'bucket{i}', VersioningConfiguration={'Status': 'Suspended'})
for i, ds in enumerate(data_sources)
]
results = await asyncio.gather(*coros)
for r in results:
pass
@pytest.mark.asyncio
async def test_put_bucket_versioning_throughput_high_volume():
"""Throughput test: high volume (100 concurrent calls)."""
N = 100
response = {'Versioning': 'Enabled'}
s3_clients = [DummyS3ClientBuilder(DummyS3Client(response=response)) for _ in range(N)]
data_sources = [S3DataSource(c) for c in s3_clients]
coros = [
ds.put_bucket_versioning(Bucket=f'bucket{i}', VersioningConfiguration={'Status': 'Enabled'})
for i, ds in enumerate(data_sources)
]
results = await asyncio.gather(*coros)
for r in results:
pass
@pytest.mark.asyncio
async def test_put_bucket_versioning_throughput_mixed_success_and_error():
"""Throughput test: mix of success and error responses."""
N = 30
success_response = {'Versioning': 'Enabled'}
error_response = {'Error': {'Code': 'AccessDenied', 'Message': 'Denied'}}
s3_clients = []
for i in range(N):
if i % 2 == 0:
s3_clients.append(DummyS3ClientBuilder(DummyS3Client(response=success_response)))
else:
s3_clients.append(DummyS3ClientBuilder(DummyS3Client(response=error_response)))
data_sources = [S3DataSource(c) for c in s3_clients]
coros = [
ds.put_bucket_versioning(Bucket=f'bucket{i}', VersioningConfiguration={'Status': 'Enabled' if i % 2 == 0 else 'Suspended'})
for i, ds in enumerate(data_sources)
]
results = await asyncio.gather(*coros)
for i, r in enumerate(results):
if i % 2 == 0:
pass
else:
pass
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
To edit these changes
git checkout codeflash/optimize-S3DataSource.put_bucket_versioning-mhx9to8vand push.