Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow links to have file:// prefix - but don't write them that way by default #1489

Merged
merged 17 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions pystac/stac_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def read_text_from_href(self, href: str) -> str:
except HTTPError as e:
raise Exception(f"Could not read uri {href}") from e
else:
href = safe_urlparse(href).path
with open(href, encoding="utf-8") as f:
href_contents = f.read()
return href_contents
Expand All @@ -328,7 +329,7 @@ def write_text_to_href(self, href: str, txt: str) -> None:
"""
if _is_url(href):
raise NotImplementedError("DefaultStacIO cannot write to urls")
href = os.fspath(href)
href = safe_urlparse(href).path
dirname = os.path.dirname(href)
if dirname != "" and not os.path.isdir(dirname):
os.makedirs(dirname)
Expand Down Expand Up @@ -391,7 +392,7 @@ def _report_duplicate_object_names(

def _is_url(href: str) -> bool:
parsed = safe_urlparse(href)
return parsed.scheme != ""
return parsed.scheme not in ["", "file"]


if HAS_URLLIB3:
Expand Down
31 changes: 28 additions & 3 deletions pystac/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,25 @@
query=parsed.query,
fragment=parsed.fragment,
)

# Windows drives sometimes get parsed as the netloc and sometimes
# as part of the parsed.path.
if parsed.scheme == "file" and os.name == "nt":
gadomski marked this conversation as resolved.
Show resolved Hide resolved
if parsed.netloc:
path = f"{parsed.netloc}{parsed.path}"

Check warning on line 79 in pystac/utils.py

View check run for this annotation

Codecov / codecov/patch

pystac/utils.py#L79

Added line #L79 was not covered by tests
elif parsed.path.startswith("/") and ":" in parsed.path:
path = parsed.path[1:]

Check warning on line 81 in pystac/utils.py

View check run for this annotation

Codecov / codecov/patch

pystac/utils.py#L81

Added line #L81 was not covered by tests
else:
path = parsed.path

Check warning on line 83 in pystac/utils.py

View check run for this annotation

Codecov / codecov/patch

pystac/utils.py#L83

Added line #L83 was not covered by tests

return URLParseResult(

Check warning on line 85 in pystac/utils.py

View check run for this annotation

Codecov / codecov/patch

pystac/utils.py#L85

Added line #L85 was not covered by tests
scheme=parsed.scheme,
netloc="",
path=path,
params=parsed.params,
query=parsed.query,
fragment=parsed.fragment,
)
else:
return parsed

Expand Down Expand Up @@ -246,7 +265,7 @@
):
return source_href

if parsed_start.scheme == "":
if parsed_start.scheme in ["", "file"]:
return _make_relative_href_path(parsed_source, parsed_start, start_is_dir)
else:
return _make_relative_href_url(parsed_source, parsed_start, start_is_dir)
Expand Down Expand Up @@ -311,6 +330,9 @@
make_posix_style(os.path.abspath(start_dir)), start_dir
)

if parsed_source.scheme or parsed_start.scheme:
abs_path = f"file://{abs_path}"

return abs_path


Expand Down Expand Up @@ -346,7 +368,10 @@
parsed_start = safe_urlparse(start_href)
parsed_source = safe_urlparse(source_href)

if parsed_source.scheme != "" or parsed_start.scheme != "":
if parsed_source.scheme not in ["", "file"] or parsed_start.scheme not in [
"",
"file",
]:
return _make_absolute_href_url(parsed_source, parsed_start, start_is_dir)
else:
return _make_absolute_href_path(parsed_source, parsed_start, start_is_dir)
Expand All @@ -364,7 +389,7 @@
bool: ``True`` if the given HREF is absolute, ``False`` if it is relative.
"""
parsed = safe_urlparse(href)
return parsed.scheme != "" or os.path.isabs(parsed.path)
return parsed.scheme not in ["", "file"] or os.path.isabs(parsed.path)


def datetime_to_str(dt: datetime, timespec: str = "auto") -> str:
Expand Down
9 changes: 9 additions & 0 deletions tests/test_stac_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ def test_read_write_collection(self) -> None:
pystac.write_file(collection, dest_href=dest_href)
self.assertTrue(os.path.exists(dest_href), msg="File was not written.")

def test_read_write_collection_with_file_protocol(self) -> None:
collection = pystac.read_file(
"file://" + TestCases.get_path("data-files/collections/multi-extent.json")
)
with tempfile.TemporaryDirectory() as tmp_dir:
dest_href = os.path.join(tmp_dir, "collection.json")
pystac.write_file(collection, dest_href="file://" + dest_href)
self.assertTrue(os.path.exists(dest_href), msg="File was not written.")

def test_read_item(self) -> None:
item = pystac.read_file(TestCases.get_path("data-files/item/sample-item.json"))
with tempfile.TemporaryDirectory() as tmp_dir:
Expand Down
19 changes: 18 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ def test_make_relative_href(self) -> None:
("/a/catalog.json", "/a/b/c/catalog.json", "../../catalog.json"),
("/a/b/c/d/", "/a/b/c/catalog.json", "./d/"),
("/a/b/c/d/.dotfile", "/a/b/c/d/catalog.json", "./.dotfile"),
(
"file:///a/b/c/d/catalog.json",
"file:///a/b/c/catalog.json",
"./d/catalog.json",
),
]

for source_href, start_href, expected in test_cases:
Expand Down Expand Up @@ -161,11 +166,22 @@ def test_make_absolute_href(self) -> None:
"https://stacspec.org/a/b/item.json",
),
("http://localhost:8000", None, "http://localhost:8000"),
("item.json", "file:///a/b/c/catalog.json", "file:///a/b/c/item.json"),
(
"./z/item.json",
"file:///a/b/c/catalog.json",
"file:///a/b/c/z/item.json",
),
("file:///a/b/c/item.json", None, "file:///a/b/c/item.json"),
]

for source_href, start_href, expected in test_cases:
actual = make_absolute_href(source_href, start_href)
_, actual = os.path.splitdrive(actual)
if expected.startswith("file://"):
_, actual = os.path.splitdrive(actual.replace("file://", ""))
actual = f"file://{actual}"
else:
_, actual = os.path.splitdrive(actual)
self.assertEqual(actual, expected)

def test_make_absolute_href_on_vsitar(self) -> None:
Expand Down Expand Up @@ -234,6 +250,7 @@ def test_is_absolute_href_os_aware(self) -> None:
test_cases = [
("/item.json", not incl_drive_letter),
("/home/someuser/Downloads/item.json", not incl_drive_letter),
("file:///home/someuser/Downloads/item.json", not incl_drive_letter),
("d:/item.json", is_windows),
("c:/files/more_files/item.json", is_windows),
]
Expand Down
Loading