Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed a common_utils bug when using file_list with no matching files #61

Merged
merged 11 commits into from
Dec 14, 2023
2 changes: 1 addition & 1 deletion braingeneers/data/test_data/maxwell-metadata.expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -172,4 +172,4 @@
"data_format": "NeurodataWithoutBorders"
}
}
}
}
2 changes: 1 addition & 1 deletion braingeneers/data/test_data/maxwell-metadata.old.json
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,4 @@
]
}
}
}
}
22 changes: 10 additions & 12 deletions src/braingeneers/utils/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import multiprocessing
import posixpath
import itertools
import pathlib


_s3_client = None # S3 client for boto3, lazy initialization performed in _lazy_init_s3_client()
Expand Down Expand Up @@ -99,25 +100,22 @@ def file_list(filepath: str) -> List[Tuple[str, str, int]]:
"""
Returns a list of files, last modified time, and size on local or S3 in descending order of last modified time

:param filepath: Local or S3 file path to list, example: "local/dir/" or "s3://braingeneers/ephys/
:param filepath: Local or S3 file path to list, example: "local/dir/" or "s3://bucket/prefix/"
:return: A list of tuples of [('fileA', 'last_modified_A', size), ('fileB', 'last_modified_B', size), ...]
"""
files_and_details = []

if filepath.startswith('s3://'):
s3_client = _lazy_init_s3_client()
o = urllib.parse.urlparse(filepath)
response = s3_client.list_objects(Bucket=o.netloc, Prefix=o.path[1:])

if 'Contents' not in response:
if raise_on_missing:
raise FileNotFoundError(filepath)
else:
return [(o.path[1:].split('/')[-1], 'Missing')]

files_and_details = [
(f['Key'].split('/')[-1], str(f['LastModified']), int(f['Size']))
for f in sorted(response['Contents'], key=lambda x: x['LastModified'], reverse=True)
]
else:
if 'Contents' in response:
files_and_details = [
(f['Key'].split('/')[-1], str(f['LastModified']), int(f['Size']))
for f in sorted(response['Contents'], key=lambda x: x['LastModified'], reverse=True)
]
elif os.path.exists(filepath):
files = sorted(pathlib.Path(filepath).iterdir(), key=os.path.getmtime, reverse=True)
files_and_details = [(f.name, str(f.stat().st_mtime), f.stat().st_size) for f in files]

Expand Down
49 changes: 49 additions & 0 deletions src/braingeneers/utils/common_utils_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import unittest
from unittest.mock import patch, MagicMock
import common_utils # Updated import statement
import os
import tempfile


class TestFileListFunction(unittest.TestCase):

@patch('common_utils._lazy_init_s3_client') # Updated to common_utils
def test_s3_files_exist(self, mock_s3_client):
# Mock S3 client response
mock_response = {
'Contents': [
{'Key': 'file1.txt', 'LastModified': '2023-01-01', 'Size': 123},
{'Key': 'file2.txt', 'LastModified': '2023-01-02', 'Size': 456}
]
}
mock_s3_client.return_value.list_objects.return_value = mock_response

result = common_utils.file_list('s3://test-bucket/') # Updated to common_utils
expected = [('file2.txt', '2023-01-02', 456), ('file1.txt', '2023-01-01', 123)]
self.assertEqual(result, expected)

@patch('common_utils._lazy_init_s3_client') # Updated to common_utils
def test_s3_no_files(self, mock_s3_client):
# Mock S3 client response for no files
mock_s3_client.return_value.list_objects.return_value = {}
result = common_utils.file_list('s3://test-bucket/') # Updated to common_utils
self.assertEqual(result, [])

def test_local_files_exist(self):
with tempfile.TemporaryDirectory() as temp_dir:
for f in ['tempfile1.txt', 'tempfile2.txt']:
with open(os.path.join(temp_dir, f), 'w') as w:
w.write('nothing')

result = common_utils.file_list(temp_dir) # Updated to common_utils
# The result should contain two files with their details
self.assertEqual(len(result), 2)

def test_local_no_files(self):
with tempfile.TemporaryDirectory() as temp_dir:
result = common_utils.file_list(temp_dir) # Updated to common_utils
self.assertEqual(result, [])


if __name__ == '__main__':
unittest.main()