Skip to content

Commit

Permalink
Merge branch 'main' into benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
charmoniumQ authored Jan 18, 2025
2 parents 5226cf2 + 906d89c commit d7e6be5
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 49 deletions.
38 changes: 32 additions & 6 deletions probe_src/frontend/cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ fn main() -> Result<()> {
arg!(--debug "Run in verbose & debug build of libprobe.")
.required(false)
.value_parser(value_parser!(bool)),
arg!(-c --"copy-files" "Copy files that would be needed to re-execute the program.")
arg!(-e --"copy-files-eagerly" "Eagerly copy files that would be needed to re-execute the program.")
.required(false)
.value_parser(value_parser!(bool)),
arg!(-c --"copy-files-lazily" "lazily Copy files that would be needed to re-execute the program.")
.required(false)
.value_parser(value_parser!(bool)),
arg!(<CMD> ... "Command to execute under provenance.")
Expand Down Expand Up @@ -86,19 +89,42 @@ fn main() -> Result<()> {
let no_transcribe = sub.get_flag("no-transcribe");
let gdb = sub.get_flag("gdb");
let debug = sub.get_flag("debug");
let copy_files = sub.get_flag("copy-files");
let copy_files_eagerly = sub.get_flag("copy-files-eagerly");
let copy_files_lazily = sub.get_flag("copy-files-lazily");
let cmd = sub
.get_many::<OsString>("CMD")
.unwrap()
.cloned()
.collect::<Vec<_>>();

if no_transcribe {
record::record_no_transcribe(output, overwrite, gdb, debug, copy_files, cmd)
if copy_files_eagerly && copy_files_lazily {
Err(eyre!(
"Cannot copy files both eagerly and lazily; please discard one or both"
))
} else {
record::record_transcribe(output, overwrite, gdb, debug, copy_files, cmd)
if no_transcribe {
record::record_no_transcribe(
output,
overwrite,
gdb,
debug,
copy_files_eagerly,
copy_files_lazily,
cmd,
)
} else {
record::record_transcribe(
output,
overwrite,
gdb,
debug,
copy_files_eagerly,
copy_files_lazily,
cmd,
)
}
.wrap_err("Record command failed")
}
.wrap_err("Record command failed")
}
Some(("transcribe", sub)) => {
let overwrite = sub.get_flag("overwrite");
Expand Down
63 changes: 48 additions & 15 deletions probe_src/frontend/cli/src/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,25 @@ pub fn record_no_transcribe(
overwrite: bool,
gdb: bool,
debug: bool,
copy_files: bool,
copy_files_eagerly: bool,
copy_files_lazily: bool,
cmd: Vec<OsString>,
) -> Result<()> {
let cwd = PathBuf::from(".");
let output = match output {
Some(x) => fs::canonicalize(x).wrap_err("Failed to canonicalize record directory path")?,
None => std::env::current_dir()
.wrap_err("Failed to get CWD")?
.join("probe_record"),
Some(x) => {
let path: &Path = x.as_ref();
let path_parent = path.parent().unwrap_or(&cwd);
let dir_name = path.file_name().unwrap();
fs::canonicalize(path_parent)
.wrap_err("Failed to canonicalize record directory path")?
.join(dir_name)
}
None => {
let mut output = std::env::current_dir().wrap_err("Failed to get CWD")?;
output.push("probe_record");
output
}
};

if overwrite {
Expand All @@ -43,7 +54,8 @@ pub fn record_no_transcribe(
Recorder::new(cmd, record_dir)
.gdb(gdb)
.debug(debug)
.copy_files(copy_files)
.copy_files_eagerly(copy_files_eagerly)
.copy_files_lazily(copy_files_lazily)
.record()?;

Ok(())
Expand All @@ -55,7 +67,8 @@ pub fn record_transcribe(
overwrite: bool,
gdb: bool,
debug: bool,
copy_files: bool,
copy_files_eagerly: bool,
copy_files_lazily: bool,
cmd: Vec<OsString>,
) -> Result<()> {
let output = match output {
Expand All @@ -78,7 +91,8 @@ pub fn record_transcribe(
)
.gdb(gdb)
.debug(debug)
.copy_files(copy_files)
.copy_files_eagerly(copy_files_eagerly)
.copy_files_lazily(copy_files_lazily)
.record()?;

match transcribe::transcribe(&record_dir, &mut tar) {
Expand All @@ -102,7 +116,8 @@ pub fn record_transcribe(
pub struct Recorder {
gdb: bool,
debug: bool,
copy_files: bool,
copy_files_eagerly: bool,
copy_files_lazily: bool,

output: Dir,
cmd: Vec<OsString>,
Expand Down Expand Up @@ -149,8 +164,10 @@ impl Recorder {
.arg("--args")
.arg(self_bin)
.arg("__gdb-exec-shim")
.args(if self.copy_files {
std::vec!["--copy-files"]
.args(if self.copy_files_eagerly {
std::vec!["--copy-files-eagerly"]
} else if self.copy_files_lazily {
std::vec!["--copy-files-lazily"]
} else {
std::vec![]
})
Expand All @@ -170,7 +187,16 @@ impl Recorder {
.args(self.cmd)
.env_remove("__PROBE_LIB")
.env_remove("__PROBE_LOG")
.env("__PROBE_COPY_FILES", if self.copy_files { "1" } else { "" })
.env(
"__PROBE_COPY_FILES",
if self.copy_files_lazily {
"lazy"
} else if self.copy_files_eagerly {
"eager"
} else {
""
},
)
.env("__PROBE_DIR", self.output.path())
.env("LD_PRELOAD", ld_preload)
.spawn()
Expand Down Expand Up @@ -236,7 +262,8 @@ impl Recorder {
Self {
gdb: false,
debug: false,
copy_files: false,
copy_files_eagerly: false,
copy_files_lazily: false,
output,
cmd,
}
Expand All @@ -255,8 +282,14 @@ impl Recorder {
}

/// Set if probe should copy files needed to re-execute.
pub fn copy_files(mut self, copy_files: bool) -> Self {
self.copy_files = copy_files;
pub fn copy_files_eagerly(mut self, copy_files_eagerly: bool) -> Self {
self.copy_files_eagerly = copy_files_eagerly;
self
}

/// Set if probe should copy files needed to re-execute.
pub fn copy_files_lazily(mut self, copy_files_lazily: bool) -> Self {
self.copy_files_lazily = copy_files_lazily;
self
}
}
37 changes: 27 additions & 10 deletions probe_src/libprobe/src/global_state.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,25 +88,42 @@ static int get_exec_epoch_safe() {
return __exec_epoch;
}

static int __copy_files = -1;
static char __copy_files = ' ';
static const char* copy_files_env_var = PRIVATE_ENV_VAR_PREFIX "COPY_FILES";
struct InodeTable read_inodes;
struct InodeTable copied_or_overwritten_inodes;
static void init_copy_files() {
assert(__copy_files == -1);
assert(__copy_files == ' ');
const char* copy_files_str = debug_getenv(copy_files_env_var);
if (copy_files_str != NULL && copy_files_str[0] != 0) {
__copy_files = 1;
inode_table_init(&read_inodes);
inode_table_init(&copied_or_overwritten_inodes);
if (copy_files_str) {
__copy_files = copy_files_str[0];
} else {
__copy_files = 0;
__copy_files = '\0';
}
DEBUG("Is copy files? %d", __copy_files);
DEBUG("Copy files? %c", __copy_files);
switch (__copy_files) {
case '\0':
break;
case 'e': /* eagerly copy files */
case 'l': /* lazily copy files */
inode_table_init(&read_inodes);
inode_table_init(&copied_or_overwritten_inodes);
break;
default:
ERROR("copy_files has invalid value %c", __copy_files);
break;
}
}
static bool should_copy_files_eagerly() {
assert(__copy_files == '\0' || __copy_files == 'e' || __copy_files == 'l');
return __copy_files == 'e';
}
static bool should_copy_files_lazily() {
assert(__copy_files == '\0' || __copy_files == 'e' || __copy_files == 'l');
return __copy_files == 'l';
}
static bool should_copy_files() {
assert(__copy_files == 1 || __copy_files == 0);
return __copy_files;
return should_copy_files_eagerly() || should_copy_files_lazily();
}

static int mkdir_and_descend(int my_dirfd, const char* name, long child, bool mkdir, bool close) {
Expand Down
57 changes: 40 additions & 17 deletions probe_src/libprobe/src/prov_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,24 @@ bool is_replace_op(struct Op op) {
return op.op_code == open_op_code && (op.data.open.flags & O_TRUNC || op.data.open.flags & O_CREAT);
}

int copy(const struct Path* path) {
int copy_to_store(const struct Path* path) {
static char dst_path[PATH_MAX];
path_to_id_string(path, dst_path);
return copy_file(path->dirfd_minus_at_fdcwd + AT_FDCWD, path->path, get_inodes_dirfd(), dst_path, path->size);
/*
** We take precautions to avoid calling copy(f) if copy(f) is already called in the same process.
** But it may have been already called in a different process!
** Especially coreutils used in every script.
*/

int dst_dirfd = get_inodes_dirfd();
int access = unwrapped_faccessat(dst_dirfd, dst_path, F_OK, 0);
if (access == 0) {
DEBUG("Already exists %s %d", path->path, path->inode);
return 0;
} else {
DEBUG("Copying %s %d", path->path, path->inode);
return copy_file(path->dirfd_minus_at_fdcwd + AT_FDCWD, path->path, dst_dirfd, dst_path, path->size);
}
}

/*
Expand All @@ -46,26 +60,35 @@ static void prov_log_try(struct Op op) {

const struct Path* path = op_to_path(&op);
if (should_copy_files() && path->path && path->stat_valid) {
if (is_read_op(op)) {
DEBUG("Reading %s %d", path->path, path->inode);
inode_table_put_if_not_exists(&read_inodes, path);
} else if (is_mutate_op(op)) {
if (inode_table_put_if_not_exists(&copied_or_overwritten_inodes, path)) {
DEBUG("Mutating, but not copying %s %d since it is copied already or overwritten", path->path, path->inode);
} else {
DEBUG("Mutating, therefore copying %s %d", path->path, path->inode);
copy(path);
}
} else if (is_replace_op(op)) {
if (inode_table_contains(&read_inodes, path)) {
if (should_copy_files_lazily()) {
if (is_read_op(op)) {
DEBUG("Reading %s %d", path->path, path->inode);
inode_table_put_if_not_exists(&read_inodes, path);
} else if (is_mutate_op(op)) {
if (inode_table_put_if_not_exists(&copied_or_overwritten_inodes, path)) {
DEBUG("Mutating, but not copying %s %d since it is copied already or overwritten", path->path, path->inode);
} else {
DEBUG("Replace after read %s %d", path->path, path->inode);
copy(path);
DEBUG("Mutating, therefore copying %s %d", path->path, path->inode);
copy_to_store(path);
}
} else if (is_replace_op(op)) {
if (inode_table_contains(&read_inodes, path)) {
if (inode_table_put_if_not_exists(&copied_or_overwritten_inodes, path)) {
DEBUG("Mutating, but not copying %s %d since it is copied already or overwritten", path->path, path->inode);
} else {
DEBUG("Replace after read %s %d", path->path, path->inode);
copy_to_store(path);
}
} else {
DEBUG("Mutating, but not copying %s %d since it was never read", path->path, path->inode);
}
}
} else if (is_read_op(op) || is_mutate_op(op)) {
assert(should_copy_files_eagerly());
if (inode_table_put_if_not_exists(&copied_or_overwritten_inodes, path)) {
DEBUG("Not copying %s %d because already did", path->path, path->inode);
} else {
DEBUG("Mutating, but not copying %s %d since it was never read", path->path, path->inode);
copy_to_store(path);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion probe_src/libprobe/src/prov_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ static struct Path create_path_lazy(int dirfd, BORROWED const char* path, int fl
* Then again, this could happen in the tracee's code too...
* TODO: Remove this once I debug myself.
* */
assert(path == NULL || path[0] != '\0' || flags & AT_EMPTY_PATH);
//assert(path == NULL || (path[0] != '\0' || flags & AT_EMPTY_PATH));

/*
* if path == NULL, then the target is the dir specified by dirfd.
Expand Down
50 changes: 50 additions & 0 deletions probe_src/python/probe_py/manual/cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import typing
import dataclasses
import json
from typing_extensions import Annotated
import pathlib
import typer
Expand Down Expand Up @@ -413,6 +416,53 @@ def nextflow(
script = g.generate_workflow(dataflow_graph)
output.write_text(script)


@export_app.command()
def ops_jsonl(
probe_log: Annotated[
pathlib.Path,
typer.Argument(help="output file written by `probe record -o $file`."),
] = pathlib.Path("probe_log"),
) -> None:
"""
Export each op to a JSON line.
The format is subject to change as PROBE evolves. Use with caution!
"""

def filter_nested_dict(
dct: typing.Mapping[typing.Any, typing.Any],
) -> typing.Mapping[typing.Any, typing.Any]:
"""Converts the bytes in a nested dict to a string"""
return {
key: (
# If dict, Recurse self
filter_nested_dict(val) if isinstance(val, dict) else
# If bytes, decode to string
val.decode(errors="surrogateescape") if isinstance(val, bytes) else
# Else, do nothing
val
)
for key, val in dct.items()
}
stdout_console = rich.console.Console()
prov_log = parse_probe_log(probe_log)
for pid, process in prov_log.processes.items():
for exec_epoch_no, exec_epoch in process.exec_epochs.items():
for tid, thread in exec_epoch.threads.items():
for i, op in enumerate(thread.ops):
stdout_console.print_json(json.dumps({
"pid": pid,
"tid": tid,
"exec_epoch_no": exec_epoch_no,
"i": i,
"op": filter_nested_dict(
dataclasses.asdict(op),
),
"op_data_type": type(op.data).__name__,
}))


# Example: scp Desktop/sample_example.txt root@136.183.142.28:/home/remote_dir
@app.command(
context_settings=dict(
Expand Down

0 comments on commit d7e6be5

Please sign in to comment.