diff --git a/flow/record/adapter/line.py b/flow/record/adapter/line.py index 40350c9..3765fb0 100644 --- a/flow/record/adapter/line.py +++ b/flow/record/adapter/line.py @@ -1,44 +1,81 @@ -from flow.record import open_path_or_stream +from __future__ import annotations + +from functools import lru_cache + +from flow.record import Record, RecordDescriptor, open_path_or_stream from flow.record.adapter import AbstractWriter from flow.record.utils import is_stdout __usage__ = """ Line output format adapter (writer only) --- -Write usage: rdump -w line://[PATH] +Write usage: rdump -w line://[PATH]?verbose=[VERBOSE] [PATH]: path to file. Leave empty or "-" to output to stdout + +Optional arguments: + [VERBOSE]: Also show fieldtype in line output (default: False) """ +@lru_cache(maxsize=1024) +def field_types_for_record_descriptor(desc: RecordDescriptor) -> dict[str, str]: + """Return dictionary of fieldname -> fieldtype for given RecordDescriptor. + + Args: + desc: RecordDescriptor to get fieldtypes for + Returns: + Dictionary of fieldname -> fieldtype + """ + return {fname: fieldset.typename for fname, fieldset in desc.get_all_fields().items()} + + class LineWriter(AbstractWriter): """Prints all fields and values of the Record on a separate line.""" fp = None - def __init__(self, path, fields=None, exclude=None, **kwargs): + def __init__( + self, + path: str, + *, + fields: list[str] | str | None = None, + exclude: list[str] | str | None = None, + verbose: bool = False, + **kwargs, + ): self.fp = open_path_or_stream(path, "wb") self.count = 0 self.fields = fields self.exclude = exclude + self.verbose = verbose if isinstance(self.fields, str): self.fields = self.fields.split(",") if isinstance(self.exclude, str): self.exclude = self.exclude.split(",") - def write(self, rec): + def write(self, rec: Record) -> None: rdict = rec._asdict(fields=self.fields, exclude=self.exclude) + rdict_types = field_types_for_record_descriptor(rec._desc) if self.verbose else None + self.count += 1 - self.fp.write("--[ RECORD {} ]--\n".format(self.count).encode()) + self.fp.write(f"--[ RECORD {self.count} ]--\n".encode()) if rdict: - fmt = "{{:>{width}}} = {{}}\n".format(width=max(len(k) for k in rdict)) + if rdict_types: + # also account for extra characters for fieldtype and whitespace + parenthesis + width = max(len(k + rdict_types[k]) for k in rdict) + 3 + else: + width = max(len(k) for k in rdict) + fmt = "{{:>{width}}} = {{}}\n".format(width=width) for key, value in rdict.items(): + if rdict_types: + key = f"{key} ({rdict_types[key]})" self.fp.write(fmt.format(key, value).encode()) - def flush(self): + def flush(self) -> None: if self.fp: self.fp.flush() - def close(self): + def close(self) -> None: if self.fp and not is_stdout(self.fp): self.fp.close() self.fp = None diff --git a/flow/record/base.py b/flow/record/base.py index 93736c0..cd9688a 100644 --- a/flow/record/base.py +++ b/flow/record/base.py @@ -499,7 +499,7 @@ def get_all_fields(self) -> Mapping[str, RecordField]: "_source": RecordField("_source", "string"), "_classification": RecordField("_classification", "datetime"), "_generated": RecordField("_generated", "datetime"), - "_version": RecordField("_version", "vaeint"), + "_version": RecordField("_version", "varint"), } Returns: diff --git a/flow/record/tools/rdump.py b/flow/record/tools/rdump.py index e12cef7..c97ae22 100644 --- a/flow/record/tools/rdump.py +++ b/flow/record/tools/rdump.py @@ -98,7 +98,9 @@ def main(argv=None): output.add_argument("-c", "--count", type=int, help="Exit after COUNT records") output.add_argument("--skip", metavar="COUNT", type=int, default=0, help="Skip the first COUNT records") output.add_argument("-w", "--writer", metavar="OUTPUT", default=None, help="Write records to output") - output.add_argument("-m", "--mode", default=None, choices=("csv", "json", "jsonlines", "line"), help="Output mode") + output.add_argument( + "-m", "--mode", default=None, choices=("csv", "json", "jsonlines", "line", "line-verbose"), help="Output mode" + ) output.add_argument( "--split", metavar="COUNT", default=None, type=int, help="Write record files smaller than COUNT records" ) @@ -155,6 +157,15 @@ def main(argv=None): default=argparse.SUPPRESS, help="Short for --mode=line", ) + aliases.add_argument( + "-Lv", + "--line-verbose", + action="store_const", + const="line-verbose", + dest="mode", + default=argparse.SUPPRESS, + help="Short for --mode=line-verbose", + ) args = parser.parse_args(argv) @@ -176,6 +187,7 @@ def main(argv=None): "json": "jsonfile://?indent=2&descriptors=false", "jsonlines": "jsonfile://?descriptors=false", "line": "line://", + "line-verbose": "line://?verbose=true", } uri = mode_to_uri.get(args.mode, uri) qparams = { diff --git a/tests/test_rdump.py b/tests/test_rdump.py index 2e547e9..1bcbf5c 100644 --- a/tests/test_rdump.py +++ b/tests/test_rdump.py @@ -624,3 +624,48 @@ def test_flow_record_invalid_tz(tmp_path, capsys): # restore DISPLAY_TZINFO just in case flow.record.fieldtypes.DISPLAY_TZINFO = flow_record_tz(default_tz="UTC") + + +@pytest.mark.parametrize( + "rdump_params", + [ + ["--mode=line-verbose"], + ["--line-verbose"], + ["-Lv"], + ["-w", "line://?verbose=true"], + ["-w", "line://?verbose=1"], + ["-w", "line://?verbose=True"], + ], +) +def test_rdump_line_verbose(tmp_path, capsys, rdump_params): + TestRecord = RecordDescriptor( + "test/rdump/line_verbose", + [ + ("datetime", "stamp"), + ("bytes", "data"), + ("uint32", "counter"), + ("string", "foo"), + ], + ) + record_path = tmp_path / "test.records" + + with RecordWriter(record_path) as writer: + writer.write(TestRecord(counter=1)) + writer.write(TestRecord(counter=2)) + writer.write(TestRecord(counter=3)) + + from flow.record.adapter.line import field_types_for_record_descriptor + + field_types_for_record_descriptor.cache_clear() + assert field_types_for_record_descriptor.cache_info().currsize == 0 + rdump.main([str(record_path)] + rdump_params) + assert field_types_for_record_descriptor.cache_info().misses == 1 + assert field_types_for_record_descriptor.cache_info().hits == 2 + assert field_types_for_record_descriptor.cache_info().currsize == 1 + + captured = capsys.readouterr() + assert captured.err == "" + assert "stamp (datetime) =" in captured.out + assert "data (bytes) =" in captured.out + assert "counter (uint32) =" in captured.out + assert "foo (string) =" in captured.out