Skip to content

Commit d17e1d9

Browse files
Ericson2314edolstraroberth
committed
Purify CanonPath
The core `CanonPath` constructors were using `absPath`, but `absPath` in some situations does IO which is not appropriate. It turns out that these constructors avoided those situations, and thus were pure, but it was far from obvious this was the case. To remedy the situation, abstract the core algorithm from `canonPath` to use separately in `CanonPath` without any IO. No we know by-construction that those constructors are pure. That leaves `CanonPath::fromCWD` as the only operation which uses IO / is impure. Add docs on it, and `CanonPath` as a whole, explaining the situation. This is also necessary to support Windows paths on windows without messing up `CanonPath`. But, I think it is good even without that. Co-authored-by: Eelco Dolstra <edolstra@gmail.com> Co-authored-by: Robert Hensing <roberth@users.noreply.github.com>
1 parent d53c890 commit d17e1d9

File tree

5 files changed

+155
-57
lines changed

5 files changed

+155
-57
lines changed

src/libutil/canon-path.cc

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,25 @@
11
#include "canon-path.hh"
2-
#include "file-system.hh"
2+
#include "util.hh"
3+
#include "file-path-impl.hh"
34

45
namespace nix {
56

67
CanonPath CanonPath::root = CanonPath("/");
78

9+
static std::string absPathPure(std::string_view path)
10+
{
11+
return canonPathInner(path, [](auto &, auto &){});
12+
}
13+
814
CanonPath::CanonPath(std::string_view raw)
9-
: path(absPath(raw, "/"))
15+
: path(absPathPure(concatStrings("/", raw)))
1016
{ }
1117

1218
CanonPath::CanonPath(std::string_view raw, const CanonPath & root)
13-
: path(absPath(raw, root.abs()))
19+
: path(absPathPure(
20+
raw.size() > 0 && raw[0] == '/'
21+
? raw
22+
: concatStrings(root.abs(), "/", raw)))
1423
{ }
1524

1625
CanonPath::CanonPath(const std::vector<std::string> & elems)

src/libutil/canon-path.hh

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,21 @@ namespace nix {
2121
*
2222
* - There are no components equal to '.' or '..'.
2323
*
24-
* Note that the path does not need to correspond to an actually
25-
* existing path, and there is no guarantee that symlinks are
26-
* resolved.
24+
* `CanonPath` are "virtual" Nix paths for abstract file system objects;
25+
* they are always Unix-style paths, regardless of what OS Nix is
26+
* running on. The `/` root doesn't denote the ambient host file system
27+
* root, but some virtual FS root.
28+
*
29+
* @note It might be useful to compare `openat(some_fd, "foo/bar")` on
30+
* Unix. `"foo/bar"` is a relative path because an absolute path would
31+
* "override" the `some_fd` directory file descriptor and escape to the
32+
* "system root". Conversely, Nix's abstract file operations *never* escape the
33+
* designated virtual file system (i.e. `SourceAccessor` or
34+
* `ParseSink`), so `CanonPath` does not need an absolute/relative
35+
* distinction.
36+
*
37+
* @note The path does not need to correspond to an actually existing
38+
* path, and the path may or may not have unresolved symlinks.
2739
*/
2840
class CanonPath
2941
{

src/libutil/file-path-impl.hh

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#pragma once
2+
/**
3+
* @file
4+
*
5+
* Pure (no IO) infrastructure just for defining other path types;
6+
* should not be used directly outside of utilities.
7+
*/
8+
#include <string>
9+
#include <string_view>
10+
11+
namespace nix {
12+
13+
/**
14+
* Core pure path canonicalization algorithm.
15+
*
16+
* @param hookComponent
17+
* A callback which is passed two arguments,
18+
* references to
19+
*
20+
* 1. the result so far
21+
*
22+
* 2. the remaining path to resolve
23+
*
24+
* This is a chance to modify those two paths in arbitrary way, e.g. if
25+
* "result" points to a symlink.
26+
*/
27+
typename std::string canonPathInner(
28+
std::string_view remaining,
29+
auto && hookComponent)
30+
{
31+
assert(remaining != "");
32+
33+
std::string result;
34+
result.reserve(256);
35+
36+
while (true) {
37+
38+
/* Skip slashes. */
39+
while (!remaining.empty() && remaining[0] == '/')
40+
remaining.remove_prefix(1);
41+
42+
if (remaining.empty()) break;
43+
44+
auto nextComp = ({
45+
auto nextPathSep = remaining.find('/');
46+
nextPathSep == remaining.npos ? remaining : remaining.substr(0, nextPathSep);
47+
});
48+
49+
/* Ignore `.'. */
50+
if (nextComp == ".")
51+
remaining.remove_prefix(1);
52+
53+
/* If `..', delete the last component. */
54+
else if (nextComp == "..")
55+
{
56+
if (!result.empty()) result.erase(result.rfind('/'));
57+
remaining.remove_prefix(2);
58+
}
59+
60+
/* Normal component; copy it. */
61+
else {
62+
result += '/';
63+
if (const auto slash = remaining.find('/'); slash == result.npos) {
64+
result += remaining;
65+
remaining = {};
66+
} else {
67+
result += remaining.substr(0, slash);
68+
remaining = remaining.substr(slash);
69+
}
70+
71+
hookComponent(result, remaining);
72+
}
73+
}
74+
75+
if (result.empty())
76+
result = "/";
77+
78+
return result;
79+
}
80+
81+
}

src/libutil/file-system.cc

Lines changed: 29 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "environment-variables.hh"
22
#include "file-system.hh"
3+
#include "file-path-impl.hh"
34
#include "signals.hh"
45
#include "finally.hh"
56
#include "serialise.hh"
@@ -21,11 +22,18 @@ namespace fs = std::filesystem;
2122

2223
namespace nix {
2324

25+
/** Treat the string as possibly an absolute path, by inspecting the start of it. Return whether it was probably intended to be absolute. */
26+
static bool isAbsolute(PathView path)
27+
{
28+
return !path.empty() && path[0] == '/';
29+
}
30+
31+
2432
Path absPath(PathView path, std::optional<PathView> dir, bool resolveSymlinks)
2533
{
2634
std::string scratch;
2735

28-
if (path.empty() || path[0] != '/') {
36+
if (!isAbsolute(path)) {
2937
// In this case we need to call `canonPath` on a newly-created
3038
// string. We set `scratch` to that string first, and then set
3139
// `path` to `scratch`. This ensures the newly-created string
@@ -58,69 +66,39 @@ Path canonPath(PathView path, bool resolveSymlinks)
5866
{
5967
assert(path != "");
6068

61-
std::string s;
62-
s.reserve(256);
63-
64-
if (path[0] != '/')
69+
if (!isAbsolute(path))
6570
throw Error("not an absolute path: '%1%'", path);
6671

72+
/* This just exists because we cannot set the target of `remaining`
73+
(the callback parameter) directly to a newly-constructed string,
74+
since it is `std::string_view`. */
6775
std::string temp;
6876

6977
/* Count the number of times we follow a symlink and stop at some
7078
arbitrary (but high) limit to prevent infinite loops. */
7179
unsigned int followCount = 0, maxFollow = 1024;
7280

73-
while (1) {
74-
75-
/* Skip slashes. */
76-
while (!path.empty() && path[0] == '/') path.remove_prefix(1);
77-
if (path.empty()) break;
78-
79-
/* Ignore `.'. */
80-
if (path == "." || path.substr(0, 2) == "./")
81-
path.remove_prefix(1);
82-
83-
/* If `..', delete the last component. */
84-
else if (path == ".." || path.substr(0, 3) == "../")
85-
{
86-
if (!s.empty()) s.erase(s.rfind('/'));
87-
path.remove_prefix(2);
88-
}
89-
90-
/* Normal component; copy it. */
91-
else {
92-
s += '/';
93-
if (const auto slash = path.find('/'); slash == path.npos) {
94-
s += path;
95-
path = {};
96-
} else {
97-
s += path.substr(0, slash);
98-
path = path.substr(slash);
99-
}
100-
101-
/* If s points to a symlink, resolve it and continue from there */
102-
if (resolveSymlinks && isLink(s)) {
81+
return canonPathInner(
82+
path,
83+
[&followCount, &temp, maxFollow, resolveSymlinks]
84+
(std::string & result, std::string_view & remaining) {
85+
if (resolveSymlinks && isLink(result)) {
10386
if (++followCount >= maxFollow)
104-
throw Error("infinite symlink recursion in path '%1%'", path);
105-
temp = concatStrings(readLink(s), path);
106-
path = temp;
107-
if (!temp.empty() && temp[0] == '/') {
108-
s.clear(); /* restart for symlinks pointing to absolute path */
87+
throw Error("infinite symlink recursion in path '%0%'", remaining);
88+
remaining = (temp = concatStrings(readLink(result), remaining));
89+
if (isAbsolute(remaining)) {
90+
/* restart for symlinks pointing to absolute path */
91+
result.clear();
10992
} else {
110-
s = dirOf(s);
111-
if (s == "/") { // we don’t want trailing slashes here, which dirOf only produces if s = /
112-
s.clear();
93+
result = dirOf(result);
94+
if (result == "/") {
95+
/* we don’t want trailing slashes here, which `dirOf`
96+
only produces if `result = /` */
97+
result.clear();
11398
}
11499
}
115100
}
116-
}
117-
}
118-
119-
if (s.empty()) {
120-
s = "/";
121-
}
122-
123-
return s;
101+
});
124102
}
125103

126104

tests/unit/libutil/canon-path.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,24 @@ namespace nix {
4141
}
4242
}
4343

44+
TEST(CanonPath, from_existing) {
45+
CanonPath p0("foo//bar/");
46+
{
47+
CanonPath p("/baz//quux/", p0);
48+
ASSERT_EQ(p.abs(), "/baz/quux");
49+
ASSERT_EQ(p.rel(), "baz/quux");
50+
ASSERT_EQ(*p.baseName(), "quux");
51+
ASSERT_EQ(*p.dirOf(), "/baz");
52+
}
53+
{
54+
CanonPath p("baz//quux/", p0);
55+
ASSERT_EQ(p.abs(), "/foo/bar/baz/quux");
56+
ASSERT_EQ(p.rel(), "foo/bar/baz/quux");
57+
ASSERT_EQ(*p.baseName(), "quux");
58+
ASSERT_EQ(*p.dirOf(), "/foo/bar/baz");
59+
}
60+
}
61+
4462
TEST(CanonPath, pop) {
4563
CanonPath p("foo/bar/x");
4664
ASSERT_EQ(p.abs(), "/foo/bar/x");

0 commit comments

Comments
 (0)