Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add -Lv/--line-verbose mode to rdump #102

Merged
merged 5 commits into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 45 additions & 8 deletions flow/record/adapter/line.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,81 @@
from flow.record import open_path_or_stream
from __future__ import annotations

from functools import lru_cache

from flow.record import Record, RecordDescriptor, open_path_or_stream
from flow.record.adapter import AbstractWriter
from flow.record.utils import is_stdout

__usage__ = """
Line output format adapter (writer only)
---
Write usage: rdump -w line://[PATH]
Write usage: rdump -w line://[PATH]?verbose=[VERBOSE]
[PATH]: path to file. Leave empty or "-" to output to stdout

Optional arguments:
[VERBOSE]: Also show fieldtype in line output (default: False)
"""


@lru_cache(maxsize=1024)
def field_types_for_record_descriptor(desc: RecordDescriptor) -> dict[str, str]:
"""Return dictionary of fieldname -> fieldtype for given RecordDescriptor.

Args:
desc: RecordDescriptor to get fieldtypes for
Returns:
Dictionary of fieldname -> fieldtype
"""
return {fname: fieldset.typename for fname, fieldset in desc.get_all_fields().items()}


class LineWriter(AbstractWriter):
"""Prints all fields and values of the Record on a separate line."""

fp = None

def __init__(self, path, fields=None, exclude=None, **kwargs):
def __init__(
self,
path: str,
*,
fields: list[str] | str | None = None,
exclude: list[str] | str | None = None,
verbose: bool = False,
**kwargs,
):
self.fp = open_path_or_stream(path, "wb")
self.count = 0
self.fields = fields
self.exclude = exclude
self.verbose = verbose
if isinstance(self.fields, str):
self.fields = self.fields.split(",")
if isinstance(self.exclude, str):
self.exclude = self.exclude.split(",")

def write(self, rec):
def write(self, rec: Record) -> None:
rdict = rec._asdict(fields=self.fields, exclude=self.exclude)
rdict_types = field_types_for_record_descriptor(rec._desc) if self.verbose else None

self.count += 1
self.fp.write("--[ RECORD {} ]--\n".format(self.count).encode())
self.fp.write(f"--[ RECORD {self.count} ]--\n".encode())
if rdict:
fmt = "{{:>{width}}} = {{}}\n".format(width=max(len(k) for k in rdict))
if rdict_types:
# also account for extra characters for fieldtype and whitespace + parenthesis
width = max(len(k + rdict_types[k]) for k in rdict) + 3
else:
width = max(len(k) for k in rdict)
fmt = "{{:>{width}}} = {{}}\n".format(width=width)
for key, value in rdict.items():
if rdict_types:
key = f"{key} ({rdict_types[key]})"
self.fp.write(fmt.format(key, value).encode())

def flush(self):
def flush(self) -> None:
if self.fp:
self.fp.flush()

def close(self):
def close(self) -> None:
if self.fp and not is_stdout(self.fp):
self.fp.close()
self.fp = None
2 changes: 1 addition & 1 deletion flow/record/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def get_all_fields(self) -> Mapping[str, RecordField]:
"_source": RecordField("_source", "string"),
"_classification": RecordField("_classification", "datetime"),
"_generated": RecordField("_generated", "datetime"),
"_version": RecordField("_version", "vaeint"),
"_version": RecordField("_version", "varint"),
}

Returns:
Expand Down
14 changes: 13 additions & 1 deletion flow/record/tools/rdump.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ def main(argv=None):
output.add_argument("-c", "--count", type=int, help="Exit after COUNT records")
output.add_argument("--skip", metavar="COUNT", type=int, default=0, help="Skip the first COUNT records")
output.add_argument("-w", "--writer", metavar="OUTPUT", default=None, help="Write records to output")
output.add_argument("-m", "--mode", default=None, choices=("csv", "json", "jsonlines", "line"), help="Output mode")
output.add_argument(
"-m", "--mode", default=None, choices=("csv", "json", "jsonlines", "line", "line-verbose"), help="Output mode"
)
output.add_argument(
"--split", metavar="COUNT", default=None, type=int, help="Write record files smaller than COUNT records"
)
Expand Down Expand Up @@ -155,6 +157,15 @@ def main(argv=None):
default=argparse.SUPPRESS,
help="Short for --mode=line",
)
aliases.add_argument(
"-Lv",
"--line-verbose",
action="store_const",
const="line-verbose",
dest="mode",
default=argparse.SUPPRESS,
help="Short for --mode=line-verbose",
)

args = parser.parse_args(argv)

Expand All @@ -176,6 +187,7 @@ def main(argv=None):
"json": "jsonfile://?indent=2&descriptors=false",
"jsonlines": "jsonfile://?descriptors=false",
"line": "line://",
"line-verbose": "line://?verbose=true",
}
uri = mode_to_uri.get(args.mode, uri)
qparams = {
Expand Down
45 changes: 45 additions & 0 deletions tests/test_rdump.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,3 +624,48 @@ def test_flow_record_invalid_tz(tmp_path, capsys):

# restore DISPLAY_TZINFO just in case
flow.record.fieldtypes.DISPLAY_TZINFO = flow_record_tz(default_tz="UTC")


@pytest.mark.parametrize(
"rdump_params",
[
["--mode=line-verbose"],
["--line-verbose"],
["-Lv"],
["-w", "line://?verbose=true"],
["-w", "line://?verbose=1"],
["-w", "line://?verbose=True"],
],
)
def test_rdump_line_verbose(tmp_path, capsys, rdump_params):
TestRecord = RecordDescriptor(
"test/rdump/line_verbose",
[
("datetime", "stamp"),
("bytes", "data"),
("uint32", "counter"),
("string", "foo"),
],
)
record_path = tmp_path / "test.records"

with RecordWriter(record_path) as writer:
writer.write(TestRecord(counter=1))
writer.write(TestRecord(counter=2))
writer.write(TestRecord(counter=3))

from flow.record.adapter.line import field_types_for_record_descriptor

field_types_for_record_descriptor.cache_clear()
assert field_types_for_record_descriptor.cache_info().currsize == 0
rdump.main([str(record_path)] + rdump_params)
assert field_types_for_record_descriptor.cache_info().misses == 1
assert field_types_for_record_descriptor.cache_info().hits == 2
assert field_types_for_record_descriptor.cache_info().currsize == 1

captured = capsys.readouterr()
assert captured.err == ""
assert "stamp (datetime) =" in captured.out
assert "data (bytes) =" in captured.out
assert "counter (uint32) =" in captured.out
assert "foo (string) =" in captured.out