Skip to content

Commit

Permalink
Merge pull request #654 from martindurant/memdirs
Browse files Browse the repository at this point in the history
Fix memory directories and add tests
  • Loading branch information
martindurant authored Jun 4, 2021
2 parents 5bfd3d8 + b956197 commit 462b03a
Show file tree
Hide file tree
Showing 10 changed files with 170 additions and 106 deletions.
2 changes: 2 additions & 0 deletions fsspec/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ def m():
"""
m = fsspec.filesystem("memory")
m.store.clear()
m.pseudo_dirs.clear()
try:
yield m
finally:
m.store.clear()
m.pseudo_dirs.clear()


@pytest.fixture
Expand Down
1 change: 1 addition & 0 deletions fsspec/fuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def __init__(self, fs, path):
self.cache = {}
self.root = path.rstrip("/") + "/"
self.counter = 0
logger.info("Starting FUSE at %s", path)

def getattr(self, path, fh=None):
logger.debug("getattr %s", path)
Expand Down
179 changes: 109 additions & 70 deletions fsspec/implementations/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,92 +18,104 @@ class MemoryFileSystem(AbstractFileSystem):
"""

store = {} # global
pseudo_dirs = []
pseudo_dirs = [""]
protocol = "memory"
root_marker = ""
root_marker = "/"

@classmethod
def _strip_protocol(cls, path):
if path.startswith("memory://"):
path = path[len("memory://") :]
if "::" in path or "://" in path:
return path.rstrip("/")
path = path.lstrip("/").rstrip("/")
return "/" + path if path else ""

def ls(self, path, detail=False, **kwargs):
path = self._strip_protocol(path)
if path in self.store:
# there is a key with this exact name, but could also be directory
out = [
# there is a key with this exact name
return [
{
"name": path,
"size": self.store[path].getbuffer().nbytes,
"type": "file",
"created": self.store[path].created,
}
]
else:
out = []
path = path.strip("/").lstrip("/")
paths = set()
starter = path + "/"
out = []
for p2 in self.store:
has_slash = "/" if p2.startswith("/") else ""
p = p2.lstrip("/")
if "/" in p:
root = p.rsplit("/", 1)[0]
else:
root = ""
if root == path:
out.append(
{
"name": has_slash + p,
"size": self.store[p2].getbuffer().nbytes,
"type": "file",
"created": self.store[p2].created,
}
)
elif (
path
and len(path) < len(p.strip("/"))
and all(
(a == b) for a, b in zip(path.split("/"), p.strip("/").split("/"))
)
):
# implicit directory
ppath = "/".join(p.split("/")[: len(path.split("/")) + 1])
if ppath not in paths:
out.append(
{
"name": has_slash + ppath + "/",
"size": 0,
"type": "directory",
}
)
paths.add(ppath)
elif all(
(a == b)
for a, b in zip(path.split("/"), [""] + p.strip("/").split("/"))
):
# root directory entry
ppath = p.rstrip("/").split("/", 1)[0]
if ppath not in paths:
if p2.startswith(starter):
if "/" not in p2[len(starter) :]:
# exact child
out.append(
{
"name": has_slash + ppath + "/",
"size": 0,
"type": "directory",
"name": p2,
"size": self.store[p2].getbuffer().nbytes,
"type": "file",
"created": self.store[p2].created,
}
)
paths.add(ppath)
elif len(p2) > len(starter):
# implied child directory
ppath = starter + p2[len(starter) :].split("/", 1)[0]
if ppath not in paths:
out = out or []
out.append(
{
"name": ppath,
"size": 0,
"type": "directory",
}
)
paths.add(ppath)
for p2 in self.pseudo_dirs:
if self._parent(p2).strip("/") == path and p2.strip("/") not in paths:
out.append({"name": p2 + "/", "size": 0, "type": "directory"})
if p2.startswith(starter):
if "/" not in p2[len(starter) :]:
# exact child pdir
if p2 not in paths:
out.append({"name": p2, "size": 0, "type": "directory"})
paths.add(p2)
else:
# directory implied by deeper pdir
ppath = starter + p2[len(starter) :].split("/", 1)[0]
if ppath not in paths:
out.append({"name": ppath, "size": 0, "type": "directory"})
paths.add(ppath)
if not out:
if path in self.pseudo_dirs:
# empty dir
return []
raise FileNotFoundError(path)
if detail:
return out
return sorted([f["name"] for f in out])

def mkdir(self, path, create_parents=True, **kwargs):
path = path.rstrip("/")
if create_parents and self._parent(path):
self.mkdir(self._parent(path), create_parents, **kwargs)
if self._parent(path) and not self.isdir(self._parent(path)):
path = self._strip_protocol(path)
if path in self.store or path in self.pseudo_dirs:
raise FileExistsError
if self._parent(path).strip("/") and self.isfile(self._parent(path)):
raise NotADirectoryError(self._parent(path))
if create_parents and self._parent(path).strip("/"):
try:
self.mkdir(self._parent(path), create_parents, **kwargs)
except FileExistsError:
pass
if path and path not in self.pseudo_dirs:
self.pseudo_dirs.append(path)

def makedirs(self, path, exist_ok=False):
try:
self.mkdir(path, create_parents=True)
except FileExistsError:
if not exist_ok:
raise

def rmdir(self, path):
path = path.rstrip("/")
path = self._strip_protocol(path)
if path in self.pseudo_dirs:
if not self.ls(path):
self.pseudo_dirs.remove(path)
Expand All @@ -116,6 +128,26 @@ def exists(self, path):
path = self._strip_protocol(path)
return path in self.store or path in self.pseudo_dirs

def info(self, path, **kwargs):
path = self._strip_protocol(path)
if path in self.pseudo_dirs or any(
p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs
):
return {
"name": path,
"size": 0,
"type": "directory",
}
elif path in self.store:
return {
"name": path,
"size": self.store[path].getbuffer().nbytes,
"type": "file",
"created": self.store[path].created,
}
else:
raise FileNotFoundError(path)

def _open(
self,
path,
Expand All @@ -125,6 +157,14 @@ def _open(
cache_options=None,
**kwargs,
):
path = self._strip_protocol(path)
if path in self.pseudo_dirs:
raise IsADirectoryError
parent = path
while len(parent) > 1:
parent = self._parent(parent)
if self.isfile(parent):
raise FileExistsError(parent)
if mode in ["rb", "ab", "rb+"]:
if path in self.store:
f = self.store[path]
Expand All @@ -144,6 +184,8 @@ def _open(
return m

def cp_file(self, path1, path2, **kwargs):
path1 = self._strip_protocol(path1)
path2 = self._strip_protocol(path2)
if self.isfile(path1):
self.store[path2] = MemoryFile(self, path2, self.store[path1].getbuffer())
elif self.isdir(path1):
Expand All @@ -153,18 +195,18 @@ def cp_file(self, path1, path2, **kwargs):
raise FileNotFoundError

def cat_file(self, path, start=None, end=None, **kwargs):
path = self._strip_protocol(path)
try:
return self.store[path].getvalue()[start:end]
except KeyError:
raise FileNotFoundError(path)

def _rm(self, path):
if self.isfile(path):
path = self._strip_protocol(path)
try:
del self.store[path]
elif self.isdir(path):
self.rmdir(path)
else:
raise FileNotFoundError
except KeyError as e:
raise FileNotFoundError from e

def rm(self, path, recursive=False, maxdepth=None):
paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
Expand All @@ -175,13 +217,10 @@ def rm(self, path, recursive=False, maxdepth=None):
# directories first.
if not self.exists(p):
continue
self.rm_file(p)

def size(self, path):
"""Size in bytes of the file at path"""
if path not in self.store:
raise FileNotFoundError(path)
return self.store[path].getbuffer().nbytes
if self.isfile(p):
self.rm_file(p)
else:
self.rmdir(p)


class MemoryFile(BytesIO):
Expand Down
8 changes: 4 additions & 4 deletions fsspec/implementations/tests/test_cached.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,9 +693,9 @@ def test_multi_cache_chain(protocol):
def test_strip(protocol):
fs = fsspec.filesystem(protocol, target_protocol="memory")
url1 = "memory://afile"
assert fs._strip_protocol(url1) == "afile"
assert fs._strip_protocol(protocol + "://afile") == "afile"
assert fs._strip_protocol(protocol + "::memory://afile") == "afile"
assert fs._strip_protocol(url1) == "/afile"
assert fs._strip_protocol(protocol + "://afile") == "/afile"
assert fs._strip_protocol(protocol + "::memory://afile") == "/afile"


@pytest.mark.parametrize("protocol", ["simplecache", "filecache"])
Expand All @@ -713,7 +713,7 @@ def test_expiry():

d = tempfile.mkdtemp()
fs = fsspec.filesystem("memory")
fn = "afile"
fn = "/afile"
fn0 = "memory://afile"
data = b"hello"
with fs.open(fn0, "wb") as f:
Expand Down
4 changes: 2 additions & 2 deletions fsspec/implementations/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ def setup():
def test_basic(cli):

fs = fsspec.filesystem("dask", target_protocol="memory")
assert fs.ls("") == ["afile"]
assert fs.cat("afile") == b"data"
assert fs.ls("") == ["/afile"]
assert fs.cat("/afile") == b"data"
Loading

0 comments on commit 462b03a

Please sign in to comment.