Skip to content

Commit

Permalink
Implement file path sanitation and relevant unit tests (Issue #137)
Browse files Browse the repository at this point in the history
  • Loading branch information
henrybear327 committed Nov 23, 2023
1 parent adb66b1 commit 087dd28
Show file tree
Hide file tree
Showing 8 changed files with 260 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ build/mini-gdbstub
build/softfloat
build/cache/
build/map/
build/path/
*.o
*.o.d
tests/**/*.elf
Expand Down
36 changes: 35 additions & 1 deletion mk/tests.mk
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,20 @@ MAP_TEST_SRCDIR := tests/map
MAP_TEST_OUTDIR:= build/map
MAP_TEST_TARGET := $(MAP_TEST_OUTDIR)/test-map

PATH_TEST_SRCDIR := tests/path
PATH_TEST_OUTDIR := build/path
PATH_TEST_TARGET := $(PATH_TEST_OUTDIR)/test-path

CACHE_TEST_OBJS := \
test-cache.o

MAP_TEST_OBJS := \
test-map.o \
mt19937.o

PATH_TEST_OBJS := \
test-path.o

CACHE_TEST_OBJS := $(addprefix $(CACHE_TEST_OUTDIR)/, $(CACHE_TEST_OBJS)) \
$(OUT)/cache.o $(OUT)/mpool.o
OBJS += $(CACHE_TEST_OBJS)
Expand All @@ -23,6 +30,11 @@ MAP_TEST_OBJS := $(addprefix $(MAP_TEST_OUTDIR)/, $(MAP_TEST_OBJS)) \
OBJS += $(MAP_TEST_OBJS)
deps += $(MAP_TEST_OBJS:%.o=%.o.d)

PATH_TEST_OBJS := $(addprefix $(PATH_TEST_OUTDIR)/, $(PATH_TEST_OBJS)) \
$(OUT)/utils.o
OBJS += $(PATH_TEST_OBJS)
deps += $(PATH_TEST_OBJS:%.o=%.o.d)

# Check adaptive replacement cache policy is enabled or not, default is LFU
ifeq ($(ENABLE_ARC), 1)
CACHE_TEST_ACTIONS := \
Expand All @@ -43,8 +55,9 @@ endif

CACHE_TEST_OUT = $(addprefix $(CACHE_TEST_OUTDIR)/, $(CACHE_TEST_ACTIONS:%=%.out))
MAP_TEST_OUT = $(MAP_TEST_TARGET).out
PATH_TEST_OUT = $(PATH_TEST_TARGET).out

tests : run-test-cache run-test-map
tests : run-test-cache run-test-map run-test-path

run-test-cache: $(CACHE_TEST_OUT)
$(Q)$(foreach e,$(CACHE_TEST_ACTIONS),\
Expand All @@ -66,6 +79,15 @@ run-test-map: $(MAP_TEST_OUT)
$(PRINTF) "Failed.\n"; \
fi;

run-test-path: $(PATH_TEST_OUT)
$(Q)$(PATH_TEST_TARGET)
$(VECHO) "Running test-path ... "; \
if [ $$? -eq 0 ]; then \
$(call notice, [OK]); \
else \
$(PRINTF) "Failed.\n"; \
fi;

$(CACHE_TEST_OUT): $(CACHE_TEST_TARGET)
$(Q)$(foreach e,$(CACHE_TEST_ACTIONS),\
$(CACHE_TEST_TARGET) $(CACHE_TEST_SRCDIR)/$(e).in > $(CACHE_TEST_OUTDIR)/$(e).out; \
Expand All @@ -91,3 +113,15 @@ $(MAP_TEST_OUTDIR)/%.o: $(MAP_TEST_SRCDIR)/%.c
$(VECHO) " CC\t$@\n"
$(Q)mkdir -p $(dir $@)
$(Q)$(CC) -o $@ $(CFLAGS) -I./src -c -MMD -MF $@.d $<

$(PATH_TEST_OUT): $(PATH_TEST_TARGET)
$(Q)touch $@

$(PATH_TEST_TARGET): $(PATH_TEST_OBJS)
$(VECHO) " CC\t$@\n"
$(Q)$(CC) $^ -o $@

$(PATH_TEST_OUTDIR)/%.o: $(PATH_TEST_SRCDIR)/%.c
$(VECHO) " CC\t$@\n"
$(Q)mkdir -p $(dir $@)
$(Q)$(CC) -o $@ $(CFLAGS) -I./src -c -MMD -MF $@.d $<
1 change: 1 addition & 0 deletions mk/tools.mk
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ HIST_OBJS := \
elf.o \
decode.o \
mpool.o \
utils.o \
rv_histogram.o

HIST_OBJS := $(addprefix $(OUT)/, $(HIST_OBJS))
Expand Down
21 changes: 18 additions & 3 deletions src/elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "elf.h"
#include "io.h"
#include "utils.h"

#if defined(_WIN32)
/* fallback to standard I/O text stream */
Expand Down Expand Up @@ -290,16 +291,23 @@ bool elf_load(elf_t *e, riscv_t *rv, memory_t *mem)
return true;
}

bool elf_open(elf_t *e, const char *path)
bool elf_open(elf_t *e, const char *input)
{
/* free previous memory */
if (e->raw_data)
release(e);

char *path = sanitize_path(input);
if (!path) {
return false;
}

#if defined(USE_MMAP)
int fd = open(path, O_RDONLY);
if (fd < 0)
if (fd < 0) {
free(path);
return false;
}

/* get file size */
struct stat st;
Expand All @@ -312,21 +320,25 @@ bool elf_open(elf_t *e, const char *path)
e->raw_data = mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (e->raw_data == MAP_FAILED) {
release(e);
free(path);
return false;
}
close(fd);

#else /* fallback to standard I/O text stream */
FILE *f = fopen(path, "rb");
if (!f)
if (!f) {
free(path);
return false;
}

/* get file size */
fseek(f, 0, SEEK_END);
e->raw_size = ftell(f);
fseek(f, 0, SEEK_SET);
if (e->raw_size == 0) {
fclose(f);
free(path);
return false;
}

Expand All @@ -339,6 +351,7 @@ bool elf_open(elf_t *e, const char *path)
fclose(f);
if (r != e->raw_size) {
release(e);
free(path);
return false;
}
#endif /* USE_MMAP */
Expand All @@ -349,9 +362,11 @@ bool elf_open(elf_t *e, const char *path)
/* check it is a valid ELF file */
if (!is_valid(e)) {
release(e);
free(path);
return false;
}

free(path);
return true;
}

Expand Down
1 change: 1 addition & 0 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "elf.h"
#include "state.h"
#include "utils.h"

/* enable program trace mode */
static bool opt_trace = false;
Expand Down
99 changes: 99 additions & 0 deletions src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
* "LICENSE" for information on usage and redistribution of this file.
*/

#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <time.h>

Expand All @@ -21,6 +24,8 @@
#endif
#endif

#define MAX_PATH_LEN 1024

static void get_time_info(int32_t *tv_sec, int32_t *tv_usec)
{
#if defined(HAVE_POSIX_TIMER)
Expand Down Expand Up @@ -67,3 +72,97 @@ void rv_clock_gettime(struct timespec *tp)
tp->tv_sec = tv_sec;
tp->tv_nsec = tv_usec / 1000; /* Transfer to microseconds */
}

char *sanitize_path(const char *input)
{
size_t n = strnlen(input, MAX_PATH_LEN);

char *ret = calloc(n + 1, sizeof(char));
if (!ret) {
return NULL;
}

/* After sanitization, the new path will only be shorter than the original
* one. Thus, we can reuse the space */
if (n == 0) {
ret[0] = '.';
return ret;
}

bool is_root = (input[0] == '/');

/*
* Invariants:
* reading from path; r is index of next byte to process -> path[r]
* writing to buf; w is index of next byte to write -> ret[strlen(ret)]
* dotdot is index in buf where .. must stop, either because:
* a) it is the leading slash
* b) it is a leading ../../.. prefix.
*/
size_t w = 0;
size_t r = 0;
size_t dotdot = 0;
if (is_root) {
ret[w] = '/';
w++;
r = 1;
dotdot = 1;
}

while (r < n) {
if (input[r] == '/') {
/* empty path element */
r++;
} else if (input[r] == '.' && (r + 1 == n || input[r + 1] == '/')) {
/* . element */
r++;
} else if (input[r] == '.' && input[r + 1] == '.' &&
(r + 2 == n || input[r + 2] == '/')) {
/* .. element: remove to last / */
r += 2;

if (w > dotdot) {
/* can backtrack */
w--;
while (w > dotdot && ret[w] != '/') {
w--;
}
} else if (!is_root) {
/* cannot backtrack, but not is_root, so append .. element. */
if (w > 0) {
ret[w] = '/';
w++;
}
ret[w] = '.';
w++;
ret[w] = '.';
w++;
dotdot = w;
}
} else {
/* real path element, add slash if needed */
if ((is_root && w != 1) || (!is_root && w != 0)) {
ret[w] = '/';
w++;
}

/* copy element */
for (; r < n && input[r] != '/'; r++) {
ret[w] = input[r];
w++;
}
}
}

/* Turn empty string into "." */
if (w == 0) {
ret[w] = '.';
w++;
}

/* starting from w till the end, we should mark it as \0 since that part of
* the buffer is not used */
memset(ret + w, '\0', sizeof(n + 1 - w));

return ret;
}
26 changes: 26 additions & 0 deletions src/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,29 @@ void rv_clock_gettime(struct timespec *tp);
/* 0x61C88647 is 32-bit golden ratio */ \
return (val * 0x61C88647 >> (32 - size_bits)) & ((size) - (1)); \
}

/*
* Reference:
* https://cs.opensource.google/go/go/+/refs/tags/go1.21.4:src/path/path.go;l=51
*
* sanitize_path returns the shortest path name equivalent to path
* by purely lexical processing. It applies the following rules
* iteratively until no further processing can be done:
*
* 1. Replace multiple slashes with a single slash.
* 2. Eliminate each . path name element (the current directory).
* 3. Eliminate each inner .. path name element (the parent directory)
* along with the non-.. element that precedes it.
* 4. Eliminate .. elements that begin a rooted path:
* that is, replace "/.." by "/" at the beginning of a path.
*
* The returned path ends in a slash only if it is the root "/".
*
* If the result of this process is an empty string, Clean
* returns the string ".".
*
* See also Rob Pike, “Lexical File Names in Plan 9 or
* Getting Dot-Dot Right,”
* https://9p.io/sys/doc/lexnames.html
*/
char *sanitize_path(const char *input);
Loading

0 comments on commit 087dd28

Please sign in to comment.