From 844219e6ceb49372da97b5205a3915458b47da7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jannis=20Christopher=20K=C3=B6hl?= Date: Fri, 26 Apr 2024 13:53:58 +0200 Subject: [PATCH 1/2] Update generated docs --- output.js | 710 ++++++++++++++++++++++++++++------------------- scripts/index.js | 24 +- yarn.lock | 93 ++++++- 3 files changed, 532 insertions(+), 295 deletions(-) diff --git a/output.js b/output.js index 969a514..cbca4a3 100644 --- a/output.js +++ b/output.js @@ -1,336 +1,478 @@ export const data = [ { - label: "discard", - type: "keyword", - detail: "Discards all incoming events.", - processedHTML: - '

This operator is mainly used to test or benchmark pipelines.

\n

Synopsis

\n
discard\n
\n

Description

\n

The discard operator has a similar effect as to file /dev/null write json,\nbut it immediately discards all events without rendering them as JSON first.

\n

Examples

\n

We can benchmark the following pipeline to see how long it takes to export\neverything.

\n
export | discard\n
', - docLink: "https://docs.tenzir.com/operators/sinks/discard", + "label": "api", + "type": "keyword", + "detail": "Use Tenzir's REST API directly from a pipeline.", + "processedHTML": "

Synopsis

\n
api <endpoint> [<request-body>]\n
\n

Description

\n

The api operator interacts with Tenzir's REST API without needing to spin up a\nweb server, making all APIs accessible from within pipelines.

\n
OpenAPI

Visit Tenzir's REST API specification to see a list of all available\nendpoints.

\n

<endpoint>

\n

The endpoint to request, e.g., /pipeline/list to list all pipelines created\nthrough the /pipeline/create endpoint.

\n

[<request-body>]

\n

A single string containing the JSON request body to send with the request.

\n

Examples

\n

List all running pipelines:

\n
api /pipeline/list\n
\n

Create a new pipeline and start it immediately.

\n
api /pipeline/create '{\"name\": \"Suricata Import\", \"definition\": \"from file /tmp/eve.sock read suricata\", \"autostart\": {\"created\": true}}'\n
", + "docLink": "https://docs.tenzir.com/operators/api" }, - { - label: "import", - type: "keyword", - detail: "Imports events into a Tenzir node. The dual to export.", - processedHTML: - '

Synopsis

\n
import\n
\n

Description

\n

The import operator persists events in a Tenzir node.

\n
Flush to disk

Pipelines ending in the import operator do not wait until all events in the\npipelines are written to disk.

We plan to change this behavior in the near future. Until then, we recommend\nrunning tenzir-ctl flush after importing events to make sure they\'re available\nfor downstream consumption.

\n

Examples

\n

Import Zeek conn logs into a Tenzir node.

\n
from file conn.log read zeek-tsv | import\n
', - docLink: "https://docs.tenzir.com/operators/sinks/import", + { + "label": "apply", + "type": "keyword", + "detail": "Include the pipeline defined in another file.", + "processedHTML": "

Synopsis

\n
apply <file>\n
\n

Description

\n

The apply operator searches for the given file, first in the current\ndirectory, and then in <config>/apply/ for every config directory, for example\n~/.config/tenzir/apply/.

\n

The .tql extension is automatically added to the filename, unless it already\nhas an extension.

", + "docLink": "https://docs.tenzir.com/operators/apply" + }, + { + "label": "batch", + "type": "keyword", + "detail": "The batch operator controls the batch size of events.", + "processedHTML": "
Expert Operator

The batch operator is a lower-level building block that lets users explicitly\ncontrol batching, which otherwise is controlled automatically by Tenzir's\nunderlying pipeline execution engine. Use with caution!

\n

Synopsis

\n
batch [--timeout <duration>] [<limit>]\n
\n

Description

\n

The batch operator takes its input and rewrites it into batches of up to the\ndesired size.

\n

--timeout <duration>

\n

Specifies a maximum latency for events passing through the batch operator. When\nunspecified, an infinite duration is used.

\n

<limit>

\n

An unsigned integer denoting how many events to put into one batch at most.

\n

Defaults to 65536.

\n

Examples

\n

Write exactly one NDJSON object at a time to a Kafka topic.

\n
batch 1 | to kafka -t topic write json -c\n
", + "docLink": "https://docs.tenzir.com/operators/batch" + }, + { + "label": "chart", + "type": "keyword", + "detail": "Add metadata to a schema, necessary for rendering as a chart.", + "processedHTML": "

Synopsis

\n
chart line [-x|--x-axis <fields>] [-y|--y-axis <field>]\nchart area [-x|--x-axis <fields>] [-y|--y-axis <field>]\nchart bar  [-x|--x-axis <fields>] [-y|--y-axis <field>]\nchart pie  [--name <field>] [--value <fields>]\n
\n

Description

\n

The chart operator adds attributes to the schema of the input events,\nthat are used to guide rendering of the data as a chart.\nThe operator does no rendering itself.

\n

-x|--x-axis <fields> (line, area, and bar charts only)

\n

Set the field used for the X-axis. Defaults to the first field in the schema.

\n

Values in this field must be strictly increasing\n(sorted in ascending order, without duplicates)\nwhen creating a line or area chart,\nor unique when creating a bar chart.

\n

-y|--y-axis <fields> (line, area, and bar charts only)

\n

Set the fields used for the Y-axis.\nCan either be a single field, or a list of fields spelled with\na list syntax ([field1, field2]).\nDefaults to every field but the first one.

\n

position=<position> (line, area, and bar charts only)

\n

Control how the values are grouped when rendered as a chart.\nPossible values are grouped and stacked.\nDefaults to grouped.

\n

--name <field> (pie chart only)

\n

Set the field used for the names of the segments.\nDefaults to the first field in the schema.

\n

Values in this field must be unique.

\n

--value <fields> (pie chart only)

\n

Set the fields used for the value of a segment.\nCan either be a single field, or multiple fields delimited with commas\n(field1,field2).\nDefaults to every field but the first one.

\n

Examples

\n

Render most common src_ip values in suricata.flow events as a bar chart:

\n
export\n| where #schema == \"suricata.flow\"\n| top src_ip\n/* -x and -y are defaulted to `src_ip` and `count` */\n| chart bar\n
\n

Render historical import throughput statistics as a line chart:

\n
metrics\n| where #schema == \"tenzir.metrics.operator\"\n| where source == true\n| summarize bytes=sum(output.approx_bytes) by timestamp resolution 1s\n| sort timestamp desc\n| chart line -x timestamp -y bytes\n
", + "docLink": "https://docs.tenzir.com/operators/chart" + }, + { + "label": "compress", + "type": "keyword", + "detail": "Compresses a stream of bytes.", + "processedHTML": "

Synopsis

\n
compress [--level=<level>] <codec>\n
\n

Description

\n

The compress operator compresses bytes in a pipeline incrementally with a\nknown codec.

\n

The compress operator is invoked automatically as a part of to\nif the resulting file has a file extension indicating compression.\nThis behavior can be circumvented by using save directly.

\n
Streaming Compression

The operator uses Apache Arrow's compression\nutilities under the hood, and transparently supports\nall options that Apache Arrow supports for streaming compression.

Besides the supported brotli, bz2, gzip, lz4, and zstd, Apache Arrow\nalso ships with codecs for lzo, lz4_raw, lz4_hadoop and snappy, which\nonly support oneshot compression. Support for them is not currently implemented.

\n

--level=<level>

\n

The compression level to use. The supported values depend on the codec used. If\nomitted, the default level for the codec is used.

\n

<codec>

\n

An identifier of the codec to use. Currently supported are brotli, bz2,\ngzip, lz4, and zstd.

\n

Examples

\n

Export all events in a Gzip-compressed NDJSON file:

\n
export\n| write json --compact-output\n| compress gzip\n| save file /tmp/backup.json.gz\n
\n

Recompress a Zstd-compressed file at a higher compression level:

\n
load file in.zst\n| decompress zstd\n| compress --level 18 zstd\n| save file out.zst\n
", + "docLink": "https://docs.tenzir.com/operators/compress" + }, + { + "label": "context", + "type": "keyword", + "detail": "Manages a context.", + "processedHTML": "

Synopsis

\n
context create  <name> <type> [<args>]\ncontext delete  <name>\ncontext update  <name> [<args>]\ncontext reset   <name>\ncontext save    <name>\ncontext load    <name>\ncontext inspect <name>\n
\n

Description

\n

The context operator manages context instances.

\n\n

<name>

\n

The name of the context to create, update, or delete.

\n

<type>

\n

The context type for the new context.

\n

See the list of available context types.

\n

<args>

\n

Context-specific options in the format --key value or --flag.

\n

Examples

\n

Create a lookup table context called feodo:

\n
context create feodo lookup-table\n
\n

Replace all previous data in the context feodo with data from the Feodo\nTracker IP Block List, using the ip_address\nfield as the lookup table key:

\n
from https://feodotracker.abuse.ch/downloads/ipblocklist.json read json --arrays-of-objects\n| context update feodo --clear --key=ip_address\n
\n

Delete the context named feodo:

\n
context delete feodo\n
\n

Inspect all data provided to feodo:

\n
context inspect feodo\n
", + "docLink": "https://docs.tenzir.com/operators/context" + }, + { + "label": "decapsulate", + "type": "keyword", + "detail": "Decapsulates packet data at link, network, and transport layer.", + "processedHTML": "
Deprecated

This operator will soon be removed in favor of first-class support for functions\nthat can be used in a variety of different operators and contexts.

\n

Synopsis

\n
decapsulate\n
\n

Description

\n

The decapsulate operator proceses events of type pcap.packet and\ndecapsulates the packet payload by extracting fields at the link, network, and\ntransport layer. The aim is not completeness, but rather exposing commonly used\nfield for analytics.

\n

The operator only processes events of type pcap.packet and emits events of\ntype tenzir.packet.

\n

VLAN Tags

\n

While decapsulating packets, decapsulate extracts\n802.1Q VLAN tags into the nested\nvlan record, consisting of an outer and inner field for the respective\ntags. The value of the VLAN tag corresponds to the 12-bit VLAN identifier (VID).\nSpecial values include 0 (frame does not carry a VLAN ID) and 0xFFF\n(reserved value; sometimes wildcard match).

\n

Examples

\n

Decapsulate packets from a PCAP file:

\n
from file /tmp/trace.pcap read pcap\n| decapsulate\n
\n

Extract packets as JSON that have the address 6.6.6.6 as source or destination,\nand destination port 5158:

\n
read pcap\n| decapsulate\n| where 6.6.6.6 && dport == 5158\n| write json\n
\n

Query VLAN IDs using vlan.outer and vlan.inner:

\n
read pcap\n| decapsulate\n| where vlan.outer > 0 || vlan.inner in [1, 2, 3]\n
\n

Filter packets by Community\nID:

\n
read pcap\n| decapsulate\n| where community_id == \"1:wCb3OG7yAFWelaUydu0D+125CLM=\"\n
", + "docLink": "https://docs.tenzir.com/operators/decapsulate" + }, + { + "label": "decompress", + "type": "keyword", + "detail": "Decompresses a stream of bytes.", + "processedHTML": "

Synopsis

\n
decompress <codec>\n
\n

Description

\n

The decompress operator decompresses bytes in a pipeline incrementally with a\nknown codec. The operator supports decompressing multiple concatenated streams\nof the same codec transparently.

\n

The decompress operator is invoked automatically as a part of from\nif the source file has a file extension indicating compression.\nThis behavior can be circumvented by using load directly.

\n
Streaming Decompression

The operator uses Apache Arrow's compression\nutilities under the hood, and transparently supports\nall options that Apache Arrow supports for streaming decompression.

Besides the supported brotli, bz2, gzip, lz4, and zstd, Apache Arrow\nalso ships with codecs for lzo, lz4_raw, lz4_hadoop and snappy, which\nonly support oneshot decompression. Support for them is not currently implemented.

\n

<codec>

\n

An identifier of the codec to use. Currently supported are brotli, bz2,\ngzip, lz4, and zstd.

\n

Examples

\n

Import Suricata events from a Zstd-compressed file:

\n
from eve.json.zst\n| import\n\nload file eve.json.zst\n| decompress zstd\n| read suricata\n| import\n
\n

Convert a Zstd-compressed file into an LZ4-compressed file:

\n
from in.zst\n| to out.lz4\n\nload file in.zst\n| decompress zstd\n| compress lz4\n| save file out.lz4\n
", + "docLink": "https://docs.tenzir.com/operators/decompress" + }, + { + "label": "deduplicate", + "type": "keyword", + "detail": "Removes duplicate events based on the values of one or more fields.", + "processedHTML": "

Synopsis

\n
deduplicate [<extractor>...]\n            [--limit <count>] [--distance <count>] [--timeout <duration>]\n
\n

Description

\n

The deduplicate operator removes duplicates from a stream of events, based\non the value of one or more fields.

\n

You have three independent configuration options to customize the operator's\nbehavior:

\n
    \n
  1. Limit: the multiplicity of the events until they are supressed as\nduplicates. A limit of 1 is equivalent to emission of unique events. A limit\nof N means that events with a unique key (defined by the fields) get\nemitted at most N times. For example, GGGYBYYBGYGB with a limit of 2\nyields GGYBYB.
  2. \n
  3. Distance: The number of events in sequence since the last occurrence of\na unique event. For example, deduplicating a stream GGGYBYYBGYGB with\ndistance 2 yields GYBBGYB.
  4. \n
  5. Timeout: The time that needs to pass until a surpressed event is no\nlonger considered a duplicate. When an event with surpressed key is seen\nbefore the timeout is reached, the timer resets.
  6. \n
\n

The diagram below illustrates these three options. The different colored boxes\nrefer to events of different schemas.

\n

\"Deduplicate

\n

<extractor>...

\n

A comma-separated list of extractors that identify the fields used for\ndeduplicating. Missing fields are treated as if they had the value null.

\n

Defaults to the entire event.

\n

--limit <count>

\n

The number of duplicates allowed before they are removed.

\n

Defaults to 1.

\n

--distance <count>

\n

Distance between two events that can be considered duplicates. Value of 1\nmeans only adjacent events can be considered duplicates. 0 means infinity.

\n

Defaults to infinity.

\n

--timeout <duration>

\n

The amount of time a specific value is remembered for deduplication. For each\nvalue, the timer is reset every time a match for that value is found.

\n

Defaults to infinity.

\n

Examples

\n

Consider the following data:

\n
{\"foo\": 1, \"bar\": \"a\"}\n{\"foo\": 1, \"bar\": \"a\"}\n{\"foo\": 1, \"bar\": \"a\"}\n{\"foo\": 1, \"bar\": \"b\"}\n{\"foo\": null, \"bar\": \"b\"}\n{\"bar\": \"b\"}\n{\"foo\": null, \"bar\": \"b\"}\n{\"foo\": null, \"bar\": \"b\"}\n
\n

For deduplicate --limit 1, all duplicate events are removed:

\n
{\"foo\": 1, \"bar\": \"a\"}\n{\"foo\": 1, \"bar\": \"b\"}\n{\"foo\": null, \"bar\": \"b\"}\n{\"bar\": \"b\"}\n
\n

If deduplicate bar --limit 1 is used, only the field bar is considered when\ndetermining whether an event is a duplicate:

\n
{\"foo\": 1, \"bar\": \"a\"}\n{\"foo\": 1, \"bar\": \"b\"}\n
\n

And for deduplicate foo --limit 1, only the field foo is considered.\nNote, how the missing foo field is treated as if it had the value null,\ni.e., it's not included in the output.

\n
{\"foo\": 1, \"bar\": \"a\"}\n{\"foo\": null, \"bar\": \"b\"}\n
", + "docLink": "https://docs.tenzir.com/operators/deduplicate" + }, + { + "label": "delay", + "type": "keyword", + "detail": "Delays events relative to a given start time, with an optional speedup.", + "processedHTML": "

Synopsis

\n
delay [--start <time>] [--speed <factor>] <field>\n
\n

Description

\n

The delay operator replays a dataflow according to a time field by introducing\nsleeping periods proportional to the inter-arrival times of the events.

\n

With --speed, you can adjust the sleep time of the time series induced by\nfield with a multiplicative factor. This has the effect of making the time\nseries \"faster\" for values great than 1 and \"slower\" for values less than 1.\nUnless you provide a start time with --start, the operator will anchor the\ntimestamps in field to begin with the current wall clock time, as if you\nprovided --start now.

\n

The diagram below illustrates the effect of applying delay to dataflow. If an\nevent in the stream has a timestamp the precedes the previous event, delay\nemits it instanstly. Otherwise delay sleeps the amount of time to reach the\nnext timestamp. As shown in the last illustration, the --speed factor has a\nscaling effect on the inter-arrival times.

\n

\"Delay\"

\n

The options --start and --speed work independently, i.e., you can use them\nseparately or both together.

\n

--start <time>

\n

The timestamp to anchor the time values around.

\n

Defaults to the first non-null timestamp in field.

\n

--speed <speed>

\n

A constant factor to be divided by the inter-arrival time. For example, 2.0\ndecreases the event gaps by a factor of two, resulting a twice as fast dataflow.\nA value of 0.1 creates dataflow that spans ten times the original time frame.

\n

Defaults to 1.0.

\n

<field>

\n

The name of the field containing the timestamp values.

\n

Examples

\n

Replay the M57 Zeek logs with real-world inter-arrival times from the ts\ncolumn. For example, if event i arrives at time t and i + 1 at time u,\nthen the delay operator will wait time u - t after emitting event i before\nemitting event i + 1. If t > u then the operator immediately emits event *i

\n\n
from https://storage.googleapis.com/tenzir-datasets/M57/zeek-all.log.zst read zeek-tsv\n| delay ts\n
\n

Replay the M57 Zeek logs at 10 times the original speed. That is, wait (u - t)\n/ 10 between event i and i + 1, assuming u > t.

\n
from https://storage.googleapis.com/tenzir-datasets/M57/zeek-all.log.zst read zeek-tsv\n| delay --speed 10 ts\n
\n

Replay as above, but start delaying only after ts exceeds 2021-11-17T16:35\nand emit all events prior to that timestamp immediately.

\n
from https://storage.googleapis.com/tenzir-datasets/M57/zeek-all.log.zst read zeek-tsv\n| delay --start \"2021-11-17T16:35\" --speed 10 ts\n
\n

Adjust the timestamp to the present, and then start replaying in 2 hours from\nnow:

\n
from https://storage.googleapis.com/tenzir-datasets/M57/zeek-all.log.zst read zeek-tsv\n| timeshift ts\n| delay --start \"in 2 hours\" ts\n
", + "docLink": "https://docs.tenzir.com/operators/delay" + }, + { + "label": "diagnostics", + "type": "keyword", + "detail": "Retrieves diagnostic events from a Tenzir node.", + "processedHTML": "

Synopsis

\n
diagnostics [--live]\n
\n

Description

\n

The diagnostics operator retrieves diagnostic events from a Tenzir\nnode.

\n

--live

\n

Work on all diagnostic events as they are generated in real-time instead of on\ndiagnostic events persisted at a Tenzir node.

\n

Schemas

\n

Tenzir emits diagnostic information with the following schema:

\n

tenzir.diagnostic

\n

Contains detailed information about the diagnostic.

\n

|Field|Type|Description|\n|:-|:-|:-|\n|pipeline_id|string|The ID of the pipeline that created the diagnostic.|\n|run|uint64|The number of the run, starting at 1 for the first run.|\n|timestamp|time|The exact timestamp of the diagnostic creation.|\n|message|string|The diagnostic message.|\n|severity|string|The diagnostic severity.|\n|notes|list<record>|The diagnostic notes. Can be empty.|\n|annotations|list<record>|The diagnostic annotations. Can be empty.|

\n

The record notes has the following schema:

\n

|Field|Type|Description|\n|:-|:-|:-|\n|kind|string|The kind of note, which is note, usage, hint or docs.|\n|message|string|The message of this note.|

\n

The record annotations has the following schema:

\n

|Field|Type|Description|\n|:-|:-|:-|\n|primary|bool|True if the source represents the underlying reason for the diagnostic, false if it is only related to it.|\n|text|string|A message for explanations. Can be empty.|\n|source|string|The character range in the pipeline string that this annotation is associated to.|

\n

Examples

\n

View all diagnostics generated in the past five minutes.

\n
diagnostics\n| where timestamp > 5 minutes ago\n
\n

Only show diagnostics that contain the error severity.

\n
diagnostics\n| where severity == \"error\"\n
", + "docLink": "https://docs.tenzir.com/operators/diagnostics" + }, + { + "label": "discard", + "type": "keyword", + "detail": "Discards all incoming events.", + "processedHTML": "

Synopsis

\n
discard\n
\n

Description

\n

The discard operator has a similar effect as to file /dev/null write json,\nbut it immediately discards all events without first rendering them with a\nprinter.

\n

This operator is mainly used to test or benchmark pipelines.

\n

Examples

\n

Benchmark to see how long it takes to export everything:

\n
export | discard\n
", + "docLink": "https://docs.tenzir.com/operators/discard" + }, + { + "label": "drop", + "type": "keyword", + "detail": "Drops fields from the input.", + "processedHTML": "

Synopsis

\n
drop <extractor>...\n
\n

Description

\n

The drop operator removes all fields matching the provided extractors and\nkeeps all other fields. It is the dual to select.

\n

In relational algebra, drop performs a projection of the complement of the\nprovided arguments.

\n

<extractor>...

\n

A comma-separated list of extractors that identify the fields to remove.

\n

Examples

\n

Remove the fields foo and bar:

\n
drop foo, bar\n
\n

Remove all fields of type ip:

\n
drop :ip\n
", + "docLink": "https://docs.tenzir.com/operators/drop" + }, + { + "label": "enrich", + "type": "keyword", + "detail": "Enriches events with a context.", + "processedHTML": "

Synopsis

\n
enrich <name>          [--field <field...>] [--replace] [--filter] [--separate]\n                       [--yield <field>] [<context-options>]\nenrich <output>=<name> [--field <field...>] [--filter] [--separate]\n                       [--yield <field>] [<context-options>]\n
\n

Description

\n

The enrich operator applies a context, extending input events with a new field\ndefined by the context.

\n

<name>

\n

The name of the context to enrich with.

\n

<output>

\n

The name of the field in which to store the context's enrichment. Defaults to\nthe name of the context.

\n

--field <field...>

\n

A comma-separated list of fields, type extractors, or concepts to match.

\n

--replace

\n

Replace the given fields with their respective context, omitting all\nmeta-information.

\n

--filter

\n

Filter events that do not match the context.

\n

This option is incompatible with --replace.

\n

--separate

\n

When multiple fields are provided, e.g., when using --field :ip to enrich all\nIP address fields, duplicate the event for every provided field and enrich them\nindividually.

\n

When using the option, the context moves from <output>.context.<path...> to\n<output> in the resulting event, with a new field <output>.path containing\nthe enriched path.

\n

--yield <path>

\n

Provide a field into the context object to use as the context instead. If the\nkey does not exist within the context, a null value is used instead.

\n

<context-options>

\n

Optional, context-specific options in the format --key value or --flag.\nRefer to the documentation of the individual contexts for these.

\n

Examples

\n

Apply the lookup-table context feodo to suricata.flow events, using the\ndest_ip field as the field to compare the context key against.

\n
export\n| where #schema == \"suricata.flow\"\n| enrich feodo --field dest_ip\n
\n

To return only events that have a context, use:

\n
export\n| where #schema == \"suricata.flow\"\n| enrich feodo --field dest_ip --filter\n
", + "docLink": "https://docs.tenzir.com/operators/enrich" + }, + { + "label": "enumerate", + "type": "keyword", + "detail": "Prepend a column with row numbers.", + "processedHTML": "

Synopsis

\n
enumerate [<field>]\n
\n

Description

\n

The enumerate operator prepends a new column with row numbers to the beginning\nof the input record.

\n
Per-schema Counting

The operator counts row numbers per schema. We plan to change this behavior with\na in the future once we have a modifer that toggles \"per-schema-ness\"\nexplicitly.

\n

<field>

\n

Sets the name of the output field.

\n

Defaults to # to avoid conflicts with existing field names.

\n

Examples

\n

Enumerate the input by prepending row numbers:

\n
from file eve.json read suricata | select event_type | enumerate | write json\n
\n
{\"#\": 0, \"event_type\": \"alert\"}\n{\"#\": 0, \"event_type\": \"flow\"}\n{\"#\": 1, \"event_type\": \"flow\"}\n{\"#\": 0, \"event_type\": \"http\"}\n{\"#\": 1, \"event_type\": \"alert\"}\n{\"#\": 1, \"event_type\": \"http\"}\n{\"#\": 2, \"event_type\": \"flow\"}\n{\"#\": 0, \"event_type\": \"fileinfo\"}\n{\"#\": 3, \"event_type\": \"flow\"}\n{\"#\": 4, \"event_type\": \"flow\"}\n
\n

Use index as field name instead of the default:

\n
enumerate index\n
", + "docLink": "https://docs.tenzir.com/operators/enumerate" + }, + { + "label": "export", + "type": "keyword", + "detail": "Retrieves events from a Tenzir node. The dual to import.", + "processedHTML": "

Synopsis

\n
export [--live] [--internal] [--low-priority]\n
\n

Description

\n

The export operator retrieves events from a Tenzir node.

\n

--live

\n

Work on all events that are imported with import operators in real-time\ninstead of on events persisted at a Tenzir node.

\n

--internal

\n

Export internal events, such as metrics or diagnostics, instead. By default,\nexport only returns events that were previously imported with import. In\ncontrast, export --internal exports internal events such as operator metrics.

\n

--low-priority

\n

Treat this export with a lower priority, causing it to interfere less with\nregular priority exports at the cost of potentially running slower.

\n

Examples

\n

Expose all persisted events as JSON data.

\n
export | to stdout\n
\n

Apply a filter to all persisted events, then only expose the first\nten results.

\n
export | where 1.2.3.4 | head 10 | to stdout\n
", + "docLink": "https://docs.tenzir.com/operators/export" + }, + { + "label": "extend", + "type": "keyword", + "detail": "Appends fields to events.", + "processedHTML": "

Synopsis

\n
extend <field=operand>...\n
\n

Description

\n

The extend operator appends a specified list of fields to the input. All\nexisting fields remain intact.

\n

The difference between extend and put is that put drops all\nfields not explicitly specified, whereas extend only appends fields.

\n

The difference between extend and replace is that replace\noverwrites existing fields, whereas extend doesn't touch the input.

\n

The difference between extend and set is that set does not\nignore fields that do already exist in the data.

\n

<field=operand>

\n

The assignment consists of field that describes the new field name and\noperand that defines the field value.

\n

Examples

\n

Add new fields with fixed values:

\n
extend secret=\"xxx\", ints=[1, 2, 3], strs=[\"a\", \"b\", \"c\"]\n
\n

Duplicate a column:

\n
extend source=src_ip\n
", + "docLink": "https://docs.tenzir.com/operators/extend" + }, + { + "label": "files", + "type": "keyword", + "detail": "Shows file information for a given directory.", + "processedHTML": "

Synopsis

\n
files [<directory>] [-r|--recurse-directories]\n                    [--follow-directory-symlink]\n                    [--skip-permission-denied]\n
\n

Description

\n

The files operator shows file information for all files in the given\ndirectory.

\n

<directory>

\n

The directory to list files in.

\n

Defaults to the current working directory.

\n

-r|--recurse-directories

\n

Recursively list files in subdirectories.

\n

--follow-directory-symlink

\n

Follow rather than skip directory symlinks.

\n

--skip-permission-denied

\n

Skip directories that would otherwise result in permission denied errors.

\n

Schemas

\n

Tenzir emits file information with the following schema.

\n

tenzir.file

\n

Contains detailed information about the file.

\n

| Field | Type | Description |\n| :---------------- | :------- | :--------------------------------------- |\n| path | string | The file path. |\n| type | string | The type of the file (see below). |\n| permissions | record | The permissions of the file (see below). |\n| owner | string | The file's owner. |\n| group | string | The file's group. |\n| file_size | uint64 | The file size in bytes. |\n| hard_link_count | uint64 | The number of hard links to the file. |\n| last_write_time | time | The time of the last write to the file. |

\n

The type field can have one of the following values:

\n

| Value | Description |\n| :---------- | :------------------------------ |\n| regular | The file is a regular file. |\n| directory | The file is a directory. |\n| symlink | The file is a symbolic link. |\n| block | The file is a block device. |\n| character | The file is a character device. |\n| fifo | The file is a named IPC pipe. |\n| socket | The file is a named IPC socket. |\n| not_found | The file does not exist. |\n| unknown | The file has an unknown type. |

\n

The permissions record contains the following fields:

\n

| Field | Type | Description |\n| :------- | :------- | :---------------------------------- |\n| owner | record | The file permissions for the owner. |\n| group | record | The file permissions for the group. |\n| others | record | The file permissions for others. |

\n

The owner, group, and others records contain the following fields:

\n

| Field | Type | Description |\n| :-------- | :----- | :------------------------------ |\n| read | bool | Whether the file is readable. |\n| write | bool | Whether the file is writeable. |\n| execute | bool | Whether the file is executable. |

\n

Examples

\n

Compute the total file size of the current directory:

\n
files -r\n| summarize total_size=sum(file_size)\n
\n

Find all named pipes in /tmp:

\n
files -r --skip-permission-denied /tmp\n| where type == \"symlink\"\n
", + "docLink": "https://docs.tenzir.com/operators/files" + }, + { + "label": "flatten", + "type": "keyword", + "detail": "Flattens nested data.", + "processedHTML": "

Synopsis

\n
flatten [<separator>]\n
\n

Description

\n

The flatten operator acts on container types:

\n
    \n
  1. Records: Join nested records with a separator (. by default). For\nexample, if a field named x is a record with fields a and b, flattening\nwill lift the nested record into the parent scope by creating two new fields\nx.a and x.b.
  2. \n
  3. Lists: Merge nested lists into a single (flat) list. For example,\n[[[2]], [[3, 1]], [[4]]] becomes [2, 3, 1, 4].
  4. \n
\n

For records inside lists, flatten \"pushes lists down\" into one list per record\nfield. For example, the record

\n
{\n  \"foo\": [\n    {\n      \"a\": 2,\n      \"b\": 1\n    },\n    {\n      \"a\": 4\n    }\n  ]\n}\n
\n

becomes

\n
{\n  \"foo.a\": [2, 4],\n  \"foo.b\": [1, null]\n}\n
\n

Lists nested in records that are nested in lists will also be flattened. For\nexample, the record

\n
{\n  \"foo\": [\n    {\n      \"a\": [\n        [2, 23],\n        [1,16]\n      ],\n      \"b\": [1]\n    },\n    {\n      \"a\": [[4]]\n    }\n  ]\n}\n
\n

becomes

\n
{\n  \"foo.a\": [\n    2,\n    23,\n    1,\n    16,\n    4\n  ],\n  \"foo.b\": [\n    1\n  ]\n}\n
\n

As you can see from the above examples, flattening also removes null values.

\n

<separator>

\n

The separator string to join the field names of nested records.

\n

Defaults to ..

\n

Examples

\n

Consider the following record:

\n
{\n  \"src_ip\": \"147.32.84.165\",\n  \"src_port\": 1141,\n  \"dest_ip\": \"147.32.80.9\",\n  \"dest_port\": 53,\n  \"event_type\": \"dns\",\n  \"dns\": {\n    \"type\": \"query\",\n    \"id\": 553,\n    \"rrname\": \"irc.freenode.net\",\n    \"rrtype\": \"A\",\n    \"tx_id\": 0,\n    \"grouped\": {\n      \"A\": [\"tenzir.com\", null]\n    }\n  }\n}\n
\n

After flatten the record looks as follows:

\n
{\n  \"src_ip\": \"147.32.84.165\",\n  \"src_port\": 1141,\n  \"dest_ip\": \"147.32.80.9\",\n  \"dest_port\": 53,\n  \"event_type\": \"dns\",\n  \"dns.type\": \"query\",\n  \"dns.id\": 553,\n  \"dns.rrname\": \"irc.freenode.net\",\n  \"dns.rrtype\": \"A\",\n  \"dns.tx_id\": 0,\n  \"dns.grouped.A\": [\"tenzir.com\"]\n}\n
\n

Note that dns.grouped.A no longer contains a null value.

", + "docLink": "https://docs.tenzir.com/operators/flatten" + }, + { + "label": "fluent-bit", + "type": "keyword", + "detail": "Sends and receives events via Fluent Bit.", + "processedHTML": "

Synopsis

\n
fluent-bit [-X|--set <key=value>,...] <plugin> [<key=value>...]\n
\n

Description

\n

The fluent-bit operator acts as a bridge into the Fluent Bit ecosystem,\nmaking it possible to acquire events from a Fluent Bit input plugin\nand process events with a Fluent Bit output plugin.

\n

Syntactically, the fluent-bit operator behaves similar to an invocation of the\nfluent-bit command line utility. For example, the invocation

\n
fluent-bit -o plugin -p key1=value1 -p key2=value2 -p ...\n
\n

translates to our fluent-bit operator as follows:

\n
fluent-bit plugin key1=value1 key2=value2 ...\n
\n

-X|--set <key=value>

\n

A comma-separated list of key-value pairs that represent the global properties\nof the Fluent Bit service., e.g., -X flush=1,grace=3.

\n

Consult the list of available key-value pairs to configure\nFluent Bit according to your needs.

\n

We recommend factoring these options into the plugin-specific fluent-bit.yaml\nso that they are independent of the fluent-bit operator arguments.

\n

<plugin>

\n

The name of the Fluent Bit plugin.

\n

Run fluent-bit -h and look under the Inputs and Outputs section of the\nhelp text for available plugin names. The web documentation often comes with an\nexample invocation near the bottom of the page, which also provides a good idea\nhow you could use the operator.

\n

<key=value>

\n

Sets a plugin configuration property.

\n

The positional arguments of the form key=value are equivalent to the\nmulti-option -p key=value of the fluent-bit executable.

\n

Examples

\n

Source

\n

Ingest OpenTelemetry\nlogs, metrics, and traces:

\n
fluent-bit opentelemetry\n
\n

You can then send JSON-encoded log data to a freshly created API endpoint:

\n
curl \\\n  --header \"Content-Type: application/json\" \\\n  --request POST \\\n  --data '{\"resourceLogs\":[{\"resource\":{},\"scopeLogs\":[{\"scope\":{},\"logRecords\":[{\"timeUnixNano\":\"1660296023390371588\",\"body\":{\"stringValue\":\"{\\\"message\\\":\\\"dummy\\\"}\"},\"traceId\":\"\",\"spanId\":\"\"}]}]}]}' \\\n  http://0.0.0.0:4318/v1/logs\n
\n

Handle Splunk HTTP\nHEC requests:

\n
fluent-bit splunk port=8088\n
\n

Handle ElasticSearch &\nOpenSearch\nBulk API requests or ingest from beats (e.g., Filebeat, Metricbeat, Winlogbeat):

\n
fluent-bit elasticsearch port=9200\n
\n

Sink

\n

Send events to Slack:

\n
fluent-bit slack webhook=https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX\n
\n

Send events to\nSplunk:

\n
fluent-bit splunk host=127.0.0.1 port=8088 tls=on tls.verify=off splunk_token=11111111-2222-3333-4444-555555555555\n
\n

Send events to\nElasticSearch:

\n
fluent-bit es host=192.168.2.3 port=9200 index=my_index type=my_type\n
", + "docLink": "https://docs.tenzir.com/operators/fluent-bit" + }, + { + "label": "from", + "type": "keyword", + "detail": "Produces events by combining a \\[connector]\\[connectors] and a \\[format]\\[formats].", + "processedHTML": "

Synopsis

\n
from <url> [read <format>]\nfrom <path> [read <format>]\nfrom <connector> [read <format>]\n
\n

Description

\n

The from operator produces events at the beginning of a pipeline by bringing\ntogether a connector and a format.

\n

If given something that looks like a path to a file, the connector can pick\nout a format automatically based on the file extension or the file name.\nThis enables a shorter syntax, e.g., from https://example.com/file.yml\nuses the yaml format. All connectors also have a default format,\nwhich will be used if the format can't be determined by the path.\nFor most connectors, this default format is json. So, for example,\nfrom stdin uses the json format.

\n

Additionally, if a file extension indicating compression can be found,\ndecompress is automatically used.\nFor example, from myfile.json.gz is automatically gzip-decompressed\nand parsed as json, i.e., load myfile.json.gz | decompress gzip | read json.

\n

The from operator is a pipeline under the hood. For most cases, it is equal to\nload <connector> | read <format>. However, for some combinations of\nconnectors and formats the underlying pipeline is a lot more complex. We\nrecommend always using from ... read ... over the load and\nread operators.

\n

<connector>

\n

The connector used to load bytes.

\n

Some connectors have connector-specific options. Please refer to the\ndocumentation of the individual connectors for more information.

\n

<format>

\n

The format used to parse events from the loaded bytes.

\n

Some formats have format-specific options. Please refer to the documentation of\nthe individual formats for more information.

\n

Examples

\n

Read bytes from stdin and parse them as JSON.

\n
from stdin read json\nfrom file stdin read json\nfrom file - read json\nfrom - read json\n
\n

Read bytes from the file path/to/eve.json and parse them as Suricata.\nNote that the file connector automatically assigns the Suricata parser for\neve.json files when no other parser is specified.\nAlso, when directly passed a filesystem path, the file connector is automatically used.

\n
from path/to/eve.json\nfrom file path/to/eve.json\nfrom file path/to/eve.json read suricata\n
\n

Read bytes from the URL https://example.com/data.json over HTTPS and parse them as JSON.\nNote that when from is passed a URL directly, the https connector is automatically used.

\n
from https://example.com/data.json read json\nfrom https example.com/data.json read json\n
", + "docLink": "https://docs.tenzir.com/operators/from" + }, + { + "label": "hash", + "type": "keyword", + "detail": "Computes a SHA256 hash digest of a given field.", + "processedHTML": "
Deprecated

This operator will soon be removed in favor of first-class support for functions\nthat can be used in a variety of different operators and contexts.

\n

Synopsis

\n
hash [-s|--salt=<string>] <field>\n
\n

Description

\n

The hash operator calculates a hash digest of a given field.

\n

<-s|--salt>=<string>

\n

A salt value for the hash.

\n

<field>

\n

The field over which the hash is computed.

\n

Examples

\n

Hash all values of the field username using the salt value \"xxx\" and store\nthe digest in a new field username_hashed:

\n
hash --salt=\"B3IwnumKPEJDAA4u\" username\n
", + "docLink": "https://docs.tenzir.com/operators/hash" + }, + { + "label": "head", + "type": "keyword", + "detail": "Limits the input to the first N events.", + "processedHTML": "

Synopsis

\n
head [<limit>]\n
\n

Description

\n

The semantics of the head operator are the same of the equivalent Unix tool:\nprocess a fixed number of events from the input. The operator terminates\nafter it has reached its limit.

\n

head <limit> is a shorthand notation for slice --end <limit>.

\n

<limit>

\n

An unsigned integer denoting how many events to keep.

\n

Defaults to 10.

\n

Examples

\n

Get the first ten events:

\n
head\n
\n

Get the first five events:

\n
head 5\n
", + "docLink": "https://docs.tenzir.com/operators/head" + }, + { + "label": "import", + "type": "keyword", + "detail": "Imports events into a Tenzir node. The dual to export.", + "processedHTML": "

Synopsis

\n
import\n
\n

Description

\n

The import operator persists events in a Tenzir node.

\n

Examples

\n

Import Zeek conn logs into a Tenzir node.

\n
from file conn.log read zeek-tsv | import\n
", + "docLink": "https://docs.tenzir.com/operators/import" + }, + { + "label": "load", + "type": "keyword", + "detail": "The load operator acquires raw bytes from a connector.", + "processedHTML": "

Synopsis

\n
load <url>\nload <path>\nload <connector>\n
\n

Description

\n

The load operator emits raw bytes.

\n

Notably, it cannot be used together with operators that expect events as input,\nbut rather only with operators that expect bytes, e.g., read or\nsave.

\n

<connector>

\n

The connector used to load bytes.

\n

Some connectors have connector-specific options. Please refer to the\ndocumentation of the individual connectors for more information.

\n

Examples

\n

Read bytes from stdin:

\n
load stdin\n
\n

Read bytes from the URL https://example.com/file.json:

\n
load https://example.com/file.json\nload https example.com/file.json\n
\n

Read bytes from the file path/to/eve.json:

\n
load path/to/eve.json\nload file path/to/eve.json\n
", + "docLink": "https://docs.tenzir.com/operators/load" + }, + { + "label": "lookup", + "type": "keyword", + "detail": "Performs live filtering of the import feed using a context,", + "processedHTML": "

and translates context updates into historical queries.

\n

Synopsis

\n
lookup <context>          [--field <field...>] [--separate]\n                          [--live] [--retro] [--snapshot]\n                          [--yield <field>] [<context-options>]\nlookup <output>=<context> [--field <field...>] [--separate]\n                          [--live] [--retro] [--snapshot]\n                          [--yield <field>] [<context-options>]\n
\n

Description

\n

The lookup operator performs two actions simultaneously:

\n
    \n
  1. Translate context updates into historical queries
  2. \n
  3. Filter all data with a context that gets ingested into a node
  4. \n
\n

These two operations combined offer unified matching, i.e., automated retro\nmatching by turning context updates into historical queries, and live matching\nwith a context on the import feed.

\n

The diagram below illustrates how the operator works:

\n

\"lookup\"

\n

<context>

\n

The name of the context to lookup with.

\n

<output>

\n

The name of the field in which to store the context's enrichment.

\n

Defaults to the name of the context.

\n

--field <field...>

\n

A comma-separated list of fields, type extractors, or concepts to match.

\n

--separate

\n

When multiple fields are provided, e.g., when using --field :ip to enrich all\nIP address fields, duplicate the event for every provided field and enrich them\nindividually.

\n

When using the option, the context moves from <output>.context.<path...> to\n<output> in the resulting event, with a new field <output>.path containing\nthe enriched path.

\n

--live

\n

Enables live lookup for incoming events.

\n

By default, both retro and live lookups are enabled. Specifying either --retro\nor --live explicitly disables the other.

\n

--retro

\n

Enables retrospective lookups for previously imported events. The lookup\noperator will then apply a context after a context update.

\n

By default, both retro and live lookups are enabled.\nSpecifying either --retro or --live explicitly disables\nthe other.

\n

--snapshot

\n

Creates a snapshot of the context at the time of execution. In combination with\n--retro, this will commence a retrospective lookup with that current context\nstate.

\n

By default, snapshotting is disabled. Not all contexts support this operation.

\n

--yield <path>

\n

Provide a field into the context object to use as the context instead. If the\nkey does not exist within the context, a null value is used instead.

\n

<context-options>

\n

Optional, context-specific options in the format --key value or --flag.\nRefer to the documentation of the individual contexts for these.

\n

Examples

\n

Apply the context feodo to incoming suricata.flow events.

\n
lookup --live feodo --field src_ip\n| where #schema == \"suricata.flow\"\n
\n

Apply the context feodo to historical suricata.flow events with every update\nto feodo.

\n
lookup --retro feodo --field src_ip\n| where #schema == \"suricata.flow\"\n
\n

Apply the context feodo to incoming suricata.flow events, and also apply the\ncontext after an update to feodo.

\n
lookup feodo --field src_ip\n| where #schema == \"suricata.flow\"\n
", + "docLink": "https://docs.tenzir.com/operators/lookup" + }, + { + "label": "measure", + "type": "keyword", + "detail": "Replaces the input with metrics describing the input.", + "processedHTML": "

Synopsis

\n
measure [--real-time] [--cumulative]\n
\n

Description

\n

The measure operator yields metrics for each received batch of events or bytes\nusing the following schema, respectively:

\n
type tenzir.metrics.events = record  {\n  timestamp: time,\n  schema: string,\n  schema_id: string,\n  events: uint64,\n}\n
\n
type tenzir.metrics.bytes = record  {\n  timestamp: time,\n  bytes: uint64,\n}\n
\n

--real-time

\n

Emit metrics immediately with every batch, rather than buffering until the\nupstream operator stalls, i.e., is idle or waiting for further input.

\n

The --real-time option is useful when inspect should emit data without\nlatency.

\n

--cumulative

\n

Emit running totals for the events and bytes fields rather than per-batch\nstatistics.

\n

Examples

\n

Get the number of bytes read incrementally for a file:

\n
{\"timestamp\": \"2023-04-28T10:22:10.192322\", \"bytes\": 16384}\n{\"timestamp\": \"2023-04-28T10:22:10.223612\", \"bytes\": 16384}\n{\"timestamp\": \"2023-04-28T10:22:10.297169\", \"bytes\": 16384}\n{\"timestamp\": \"2023-04-28T10:22:10.387172\", \"bytes\": 16384}\n{\"timestamp\": \"2023-04-28T10:22:10.408171\", \"bytes\": 8232}\n
\n

Get the number of events read incrementally from a file:

\n
{\"timestamp\": \"2023-04-28T10:26:45.159885\", \"events\": 65536, \"schema\": \"suricata.dns\", \"schema_id\": \"d49102998baae44a\"}\n{\"timestamp\": \"2023-04-28T10:26:45.812321\", \"events\": 412, \"schema\": \"suricata.dns\", \"schema_id\": \"d49102998baae44a\"}\n
\n

Get the total number of events in a file, grouped by schema:

\n
{\"events\": 65948, \"schema\": \"suricata.dns\"}\n
", + "docLink": "https://docs.tenzir.com/operators/measure" + }, + { + "label": "metrics", + "type": "keyword", + "detail": "Retrieves metrics events from a Tenzir node.", + "processedHTML": "

Synopsis

\n
metrics [--live]\n
\n

Description

\n

The metrics operator retrieves metrics events from a Tenzir node. Metrics\nevents are collected every second.

\n

--live

\n

Work on all metrics events as they are generated in real-time instead of on\nmetrics events persisted at a Tenzir node.

\n

Schemas

\n

Tenzir collects metrics with the following schemas.

\n

tenzir.metrics.cpu

\n

Contains a measurement of CPU utilization.

\n

|Field|Type|Description|\n|:-|:-|:-|\n|loadavg_1m|double|The load average over the last minute.|\n|loadavg_5m|double|The load average over the last 5 minutes.|\n|loadavg_15m|double|The load average over the last 15 minutes.|

\n

tenzir.metrics.disk

\n

Contains a measurement of disk space usage.

\n

|Field|Type|Description|\n|:-|:-|:-|\n|path|string|The byte measurements below refer to the filesystem on which this path is located.|\n|total_bytes|uint64|The total size of the volume, in bytes.|\n|used_bytes|uint64|The number of bytes occupied on the volume.|\n|free_bytes|uint64|The number of bytes still free on the volume.|

\n

tenzir.metrics.memory

\n

Contains a measurement of the available memory on the host.

\n

|Field|Type|Description|\n|:-|:-|:-|\n|total_bytes|uint64|The total available memory, in bytes.|\n|used_bytes|uint64|The amount of memory used, in bytes.|\n|free_bytes|uint64|The amount of free memory, in bytes.|

\n

tenzir.metrics.operator

\n

Contains input and output measurements over some amount of time for a single\noperator instantiation.

\n

|Field|Type|Description|\n|:-|:-|:-|\n|pipeline_id|string|The ID of the pipeline where the associated operator is from.|\n|run|uint64|The number of the run, starting at 1 for the first run.|\n|hidden|bool|True if the pipeline is running for the explorer.|\n|operator_id|uint64|The ID of the operator inside the pipeline referenced above.|\n|source|bool|True if this is the first operator in the pipeline.|\n|transformation|bool|True if this is neither the first nor the last operator.|\n|sink|bool|True if this is the last operator in the pipeline.|\n|internal|bool|True if the data flow is considered to internal to Tenzir.|\n|timestamp|time|The time when this event was emitted (immediately after the collection period).|\n|duration|duration|The timespan over which this data was collected.|\n|starting_duration|duration|The time spent to start the operator.|\n|processing_duration|duration|The time spent processing the data.|\n|scheduled_duration|duration|The time that the operator was scheduled.|\n|running_duration|duration|The time that the operator was running.|\n|paused_duration|duration|The time that the operator was paused.|\n|input|record|Measurement of the incoming data stream.|\n|output|record|Measurement of the outgoing data stream.|

\n

The records input and output have the following schema:

\n

|Field|Type|Description|\n|:-|:-|:-|\n|unit|string|The type of the elements, which is void, bytes or events.|\n|elements|uint64|Number of elements that were seen during the collection period.|\n|approx_bytes|uint64|An approximation for the number of bytes transmitted.|

\n

tenzir.metrics.process

\n

Contains a measurement of the amount of memory used by the tenzir-node process.

\n

|Field|Type|Description|\n|:-|:-|:-|\n|current_memory_usage|uint64|The memory currently used by this process.|\n|peak_memory_usage|uint64|The peak amount of memory, in bytes.|\n|swap_space_usage|uint64|The amount of swap space, in bytes. Only available on Linux systems.|

\n

Examples

\n

Show the CPU usage over the last hour:

\n
metrics\n| where #schema == \"tenzir.metrics.cpu\"\n| where timestamp > 1 hour ago\n| put timestamp, percent=loadavg_1m\n
\n<details>\n<summary>Output</summary>\n
{\n  \"timestamp\": \"2023-12-21T12:00:32.631102\",\n  \"percent\": 0.40478515625\n}\n{\n  \"timestamp\": \"2023-12-21T11:59:32.626043\",\n  \"percent\": 0.357421875\n}\n{\n  \"timestamp\": \"2023-12-21T11:58:32.620327\",\n  \"percent\": 0.42578125\n}\n{\n  \"timestamp\": \"2023-12-21T11:57:32.614810\",\n  \"percent\": 0.50390625\n}\n{\n  \"timestamp\": \"2023-12-21T11:56:32.609896\",\n  \"percent\": 0.32080078125\n}\n{\n  \"timestamp\": \"2023-12-21T11:55:32.605871\",\n  \"percent\": 0.5458984375\n}\n
\n</details>\n

Get the current memory usage:

\n
metrics\n| where #schema == \"tenzir.metrics.memory\"\n| sort timestamp desc\n| tail 1\n| put current_memory_usage\n
\n<details>\n<summary>Output</summary>\n
{\n  \"current_memory_usage\": 1083031552\n}\n
\n</details>\n

Show the total pipeline ingress in bytes for every day over the last week,\nexcluding pipelines run in the Explorer:

\n
metrics\n| where #schema == \"tenzir.metrics.operator\"\n| where timestamp > 1 week ago\n| where hidden == false and source == true\n| summarize bytes=sum(output.approx_bytes) by timestamp resolution 1 day\n
\n<details>\n<summary>Output</summary>\n
{\n  \"timestamp\": \"2023-11-08T00:00:00.000000\",\n  \"bytes\": 79927223\n}\n{\n  \"timestamp\": \"2023-11-09T00:00:00.000000\",\n  \"bytes\": 51788928\n}\n{\n  \"timestamp\": \"2023-11-10T00:00:00.000000\",\n  \"bytes\": 80740352\n}\n{\n  \"timestamp\": \"2023-11-11T00:00:00.000000\",\n  \"bytes\": 75497472\n}\n{\n  \"timestamp\": \"2023-11-12T00:00:00.000000\",\n  \"bytes\": 55497472\n}\n{\n  \"timestamp\": \"2023-11-13T00:00:00.000000\",\n  \"bytes\": 76546048\n}\n{\n  \"timestamp\": \"2023-11-14T00:00:00.000000\",\n  \"bytes\": 68643200\n}\n
\n</details>\n

Show the three operator instantiations that produced the most events in total\nand their pipeline IDs:

\n
metrics\n| where #schema == \"tenzir.metrics.operator\"\n| where output.unit == \"events\"\n| summarize events=max(output.elements) by pipeline_id, operator_id\n| sort events desc\n| head 3\n
\n<details>\n<summary>Output</summary>\n
{\n  \"pipeline_id\": \"70a25089-b16c-448d-9492-af5566789b99\",\n  \"operator_id\": 0,\n  \"events\": 391008694\n}\n{\n  \"pipeline_id\": \"7842733c-06d6-4713-9b80-e20944927207\",\n  \"operator_id\": 0,\n  \"events\": 246914949\n}\n{\n  \"pipeline_id\": \"6df003be-0841-45ad-8be0-56ff4b7c19ef\",\n  \"operator_id\": 1,\n  \"events\": 83013294\n}\n
\n</details>\n

Get the disk usage over time:

\n
metrics\n| where #schema == \"tenzir.metrics.disk\"\n| sort timestamp\n| put timestamp, used_bytes\n
\n<details>\n<summary>Output</summary>\n
{\n  \"timestamp\": \"2023-12-21T12:52:32.900086\",\n  \"used_bytes\": 461834444800\n}\n{\n  \"timestamp\": \"2023-12-21T12:53:32.905548\",\n  \"used_bytes\": 461834584064\n}\n{\n  \"timestamp\": \"2023-12-21T12:54:32.910918\",\n  \"used_bytes\": 461840302080\n}\n{\n  \"timestamp\": \"2023-12-21T12:55:32.916200\",\n  \"used_bytes\": 461842751488\n}\n
\n</details>\n

Get the memory usage over time:

\n
metrics\n| where #schema == \"tenzir.metrics.memory\"\n| sort timestamp\n| put timestamp, used_bytes\n
\n<details>\n<summary>Output</summary>\n
{\n  \"timestamp\": \"2023-12-21T13:08:32.982083\",\n  \"used_bytes\": 48572645376\n}\n{\n  \"timestamp\": \"2023-12-21T13:09:32.986962\",\n  \"used_bytes\": 48380682240\n}\n{\n  \"timestamp\": \"2023-12-21T13:10:32.992494\",\n  \"used_bytes\": 48438878208\n}\n{\n  \"timestamp\": \"2023-12-21T13:11:32.997889\",\n  \"used_bytes\": 48491839488\n}\n{\n  \"timestamp\": \"2023-12-21T13:12:33.003323\",\n  \"used_bytes\": 48529952768\n}\n
\n</details>", + "docLink": "https://docs.tenzir.com/operators/metrics" + }, + { + "label": "nics", + "type": "keyword", + "detail": "Shows a snapshot of available network interfaces.", + "processedHTML": "

Synopsis

\n
nics\n
\n

Description

\n

The nics operator shows a snapshot of all available network interfaces.

\n

Schemas

\n

Tenzir emits network interface card information with the following schema.

\n

tenzir.nic

\n

Contains detailed information about the network interface.

\n

|Field|Type|Description|\n|:-|:-|:-|\n|name|string|The name of the network interface.|\n|description|string|A brief note or explanation about the network interface.|\n|addresses|list|A list of IP addresses assigned to the network interface.|\n|loopback|bool|Indicates if the network interface is a loopback interface.|\n|up|bool|Indicates if the network interface is up and can transmit data.|\n|running|bool|Indicates if the network interface is running and operational.|\n|wireless|bool|Indicates if the network interface is a wireless interface.|\n|status|record|A record containing detailed status information about the network interface.|

\n

The record status has the following schema:

\n

|Field|Type|Description|\n|:-|:-|:-|\n|unknown|bool|Indicates if the network interface status is unknown.|\n|connected|bool|Indicates if the network interface is connected.|\n|disconnected|bool|Indicates if the network interface is disconnected.|\n|not_applicable|bool|Indicates if the network interface is not applicable.|

\n

Examples

\n

List all connected network interfaces.

\n
nics\n| where status.connected == true\n
", + "docLink": "https://docs.tenzir.com/operators/nics" + }, + { + "label": "openapi", + "type": "keyword", + "detail": "Shows the node's OpenAPI specification.", + "processedHTML": "

Synopsis

\n
openapi\n
\n

Description

\n

The openapi operator shows the current Tenzir node's OpenAPI\nspecification for all available REST endpoint plugins.

\n

Examples

\n

Render the OpenAPI specification as YAML:

\n
openapi | write yaml\n
", + "docLink": "https://docs.tenzir.com/operators/openapi" + }, + { + "label": "parse", + "type": "keyword", + "detail": "Applies a parser to the string stored in a given field.", + "processedHTML": "

Synopsis

\n
parse <input> <parser> <args>...\n
\n

Description

\n

The parse operator parses a given <input> field of type string using\n<parser> and replaces this field with the result. <parser> can be one of the\nparsers in formats.

\n

Examples

\n

Parse CEF from the Syslog messages stored in test.log,\nreturning only the result from CEF parser.

\n
from test.log read syslog | parse content cef | yield content\n
", + "docLink": "https://docs.tenzir.com/operators/parse" + }, + { + "label": "pass", + "type": "keyword", + "detail": "Does nothing with the input.", + "processedHTML": "

Synopsis

\n
pass\n
\n

Description

\n

The pass operator relays the input without any modification. It exists\nprimarily for testing and debugging.

\n

You can think of pass as the \"identity\" operator.

\n

Examples

\n

Forward the input without any changes:

\n
pass\n
", + "docLink": "https://docs.tenzir.com/operators/pass" + }, + { + "label": "processes", + "type": "keyword", + "detail": "Shows a snapshot of running processes.", + "processedHTML": "

Synopsis

\n
processes\n
\n

Description

\n

The processes operator shows a snapshot of all currently running processes.

\n

Schemas

\n

Tenzir emits process information with the following schema.

\n

tenzir.process

\n

Contains detailed information about the process.

\n

|Field|Type|Description|\n|:-|:-|:-|\n|name|string|The process name.|\n|command_line|list<string>|The command line of the process.|\n|pid|uint64|The process identifier.|\n|ppid|uint64|The parent process identifier.|\n|uid|uint64|The user identifier of the process owner.|\n|gid|uint64|The group identifier of the process owner.|\n|ruid|uint64|The real user identifier of the process owner.|\n|rgid|uint64|The real group identifier of the process owner.|\n|priority|string|The priority level of the process.|\n|startup|time|The time when the process was started.|\n|vsize|uint64|The virtual memory size of the process.|\n|rsize|uint64|The resident set size (physical memory used) of the process.|\n|swap|uint64|The amount of swap memory used by the process.|\n|peak_mem|uint64|Peak memory usage of the process.|\n|open_fds|uint64|The number of open file descriptors by the process.|\n|utime|duration|The user CPU time consumed by the process.|\n|stime|duration|The system CPU time consumed by the process.|

\n

Examples

\n

Show running processes sorted by how long they've been running:

\n
processes\n| sort startup desc\n
\n

Show the top five running processes by name:

\n
processes\n| top name\n| head 5\n
", + "docLink": "https://docs.tenzir.com/operators/processes" }, - { - label: "save", - type: "keyword", - detail: "The save operator acquires raw bytes from a connector.", - processedHTML: - '
Expert Operator

The save operator is a lower-level building block of the to\noperator. Only use this if you need to operate on raw bytes.

\n

Synopsis

\n
save <connector>\n
\n

Description

\n

The save operator operates on raw bytes.

\n

Notably, it cannot be used after an operator that emits events, but rather only\nwith operators that emit bytes, e.g., write or\nload.

\n

<connector>

\n

The connector used to save bytes.

\n

Some connectors have connector-specific options. Please refer to the\ndocumentation of the individual connectors for more information.

\n

Examples

\n

Write bytes to stdout:

\n
save stdin\n
\n

Write bytes to the file path/to/eve.json:

\n
save file path/to/eve.json\n
', - docLink: "https://docs.tenzir.com/operators/sinks/save", + { + "label": "pseudonymize", + "type": "keyword", + "detail": "Pseudonymizes fields according to a given method.", + "processedHTML": "
Deprecated

This operator will soon be removed in favor of first-class support for functions\nthat can be used in a variety of different operators and contexts.

\n

Synopsis

\n
pseudonymize -m|--method=<string> -s|--seed=<seed> <extractor>...\n
\n

Description

\n

The pseudonimize operator replaces IP address using the\nCrypto-PAn algorithm.

\n

Currently, pseudonimize exclusively works for fields of type ip.

\n

-m|--method=<string>

\n

The algorithm for pseudonimization

\n

-s|--seed=<seed>

\n

A 64-byte seed that describes a hexadecimal value. When the seed is shorter than\n64 bytes, the operator will append zeros to match the size; when it is longer,\nit will truncate the seed.

\n

<extractor>...

\n

The list of extractors describing fields to pseudonomize. If an extractor\nmatches types other than IP addresses, the operator will ignore them.

\n

Example

\n

Pseudonymize all values of the fields src_ip and dest_ip using the\ncrypto-pan algorithm and deadbeef seed:

\n
pseudonymize --method=\"crypto-pan\" --seed=\"deadbeef\" src_ip, dest_ip\n
", + "docLink": "https://docs.tenzir.com/operators/pseudonymize" }, - { - label: "serve", - type: "keyword", - detail: "Make events available under the \\[/serve REST API", - processedHTML: - '

endpoint](/api#/paths/~1serve/post).

\n

Synopsis

\n
serve [--buffer-size <buffer-size>] <serve-id>\n
\n

Description

\n

The serve operator bridges between pipelines and the corresponding /serve\nREST API endpoint.

\n

Pipelines ending with the serve operator exit when all events have been\ndelivered over the corresponding endpoint.

\n

--buffer-size <buffer-size>

\n

The buffer size specifies the maximum number of events to keep in the serve\noperator to make them instantly available in the corresponding endpoint before\nthrottling the pipeline execution.

\n

Defaults to 64Ki.

\n

<serve-id>

\n

The serve id is an identifier that uniquely identifies the operator. The serve\noperator errors when receiving a duplicate serve id.

\n

Examples

\n

Read a Zeek conn.log, 100 events at a time:

\n
tenzir \'from file path/to/conn.log read zeek-tsv | serve zeek-conn-logs\'\n
\n
curl \\\n  -X POST \\\n  -H "Content-Type: application/json" \\\n  -d \'{"serve_id": "zeek-conn-logs", "continuation_token": null, "timeout": "1s", "max_events": 100}\' \\\n  http://localhost:5160/api/v0/serve\n
\n

This will return up to 100 events, or less if the specified timeout of 1 second\nexpired.

\n

Subsequent results for further events must specify a continuation token. The\ntoken is included in the response under next_continuation_token if there are\nfurther events to be retrieved from the endpoint.

', - docLink: "https://docs.tenzir.com/operators/sinks/serve", + { + "label": "publish", + "type": "keyword", + "detail": "Publishes events to a channel with a topic. The dual to", + "processedHTML": "

subscribe.

\n

Synopsis

\n
publish [<topic>]\n
\n

Description

\n

The publish operator publishes events at a node in a channel with the\nspecified topic. Any number of subscribers using the subscribe\noperator receive the events immediately.

\n

<topic>

\n

An optional topic for publishing events under. The provided topic must be\nunique.

\n

Defaults to the empty string.

\n

Examples

\n

Publish Zeek conn logs under the topic zeek-conn.

\n
from file conn.log read zeek-tsv | publish zeek-conn\n
", + "docLink": "https://docs.tenzir.com/operators/publish" }, - { - label: "to", - type: "keyword", - detail: - "Consumes events by combining a \\[connector]\\[connectors] and a \\[format]\\[formats].", - processedHTML: - '

Synopsis

\n
to <connector> [write <format>]\n
\n

Description

\n

The to operator consumes events at the end of a pipeline by bringing together\na connector and a format.

\n

All connectors have a default format, which depends on the connector. This enables\na shorter syntax, e.g., to stdout uses the json format, while to file foo.csv\nuses the csv format.

\n

The to operator is a pipeline under the hood. For most cases, it is equal to\nwrite <format> | save <connector>. However, for some combinations of\nconnectors and formats the underlying pipeline is a bit more complex. We\nrecommend always using to ... write ... over the\nwrite and save operators.

\n

<connector>

\n

The connector used to save bytes.

\n

Some connectors have connector-specific options. Please refer to the\ndocumentation of the individual connectors for more information.

\n

<format>

\n

The format used to print events to bytes.

\n

Some formats have format-specific options. Please refer to the documentation of\nthe individual formats for more information.

\n

Examples

\n

Write events to stdout formatted as CSV.

\n
to stdout write csv\n
\n

Write events to the file path/to/eve.json formatted as JSON.

\n
to file path/to/eve.json write json\n
', - docLink: "https://docs.tenzir.com/operators/sinks/to", - }, - { - label: "export", - type: "keyword", - detail: "Retrieves events from a Tenzir node. The dual to import.", - processedHTML: - '

Synopsis

\n
export\n
\n

Description

\n

The export operator retrieves events from a Tenzir node.

\n
Flush to disk

Pipelines starting with the export operator do not access events that are not\nwritten to disk.

We recommend running tenzir-ctl flush before exporting events to make sure\nthey\'re available for downstream consumption.

\n

Examples

\n

Expose all persisted events as JSON data.

\n
export | to stdout\n
\n

Apply a filter to all persisted events, then\nonly expose the first ten results.

\n
export | where 1.2.3.4 | head 10 | to stdout\n
', - docLink: "https://docs.tenzir.com/operators/sources/export", - }, - { - label: "from", - type: "keyword", - detail: - "Produces events by combining a \\[connector]\\[connectors] and a \\[format]\\[formats].", - processedHTML: - '

Synopsis

\n
from <connector> [read <format>]\n
\n

Description

\n

The from operator produces events at the beginning of a pipeline by bringing\ntogether a connector and a format.

\n

All connectors have a default format. This enables a shorter syntax, e.g.,\nfrom stdin uses the json format, while from file foo.csv uses the csv\nformat.

\n

The from operator is a pipeline under the hood. For most cases, it is equal to\nload <connector> | read <format>. However, for some combinations of\nconnectors and formats the underlying pipeline is a lot more complex. We\nrecommend always using from ... read ... over the load and\nread operators.

\n

<connector>

\n

The connector used to load bytes.

\n

Some connectors have connector-specific options. Please refer to the\ndocumentation of the individual connectors for more information.

\n

<format>

\n

The format used to parse events from the loaded bytes.

\n

Some formats have format-specific options. Please refer to the documentation of\nthe individual formats for more information.

\n

Examples

\n

Read bytes from stdin and parse them as JSON.

\n
from stdin read json\nfrom file stdin read json\nfrom file - read json\nfrom - read json\n
\n

Read bytes from the file path/to/eve.json and parse them as Suricata.\nNote that the file connector automatically assigns the Suricata parser for\neve.json files when no other parser is specified.

\n
from file path/to/eve.json\nfrom file path/to/eve.json read suricata\n
', - docLink: "https://docs.tenzir.com/operators/sources/from", - }, - { - label: "load", - type: "keyword", - detail: "The load operator acquires raw bytes from a connector.", - processedHTML: - '
Expert Operator

The load operator is a lower-level building block of the from\noperator. Only use this if you need to operate on raw bytes.

\n

Synopsis

\n
load <connector>\n
\n

Description

\n

The load operator emits raw bytes.

\n

Notably, it cannot be used together with operators that expect events as input,\nbut rather only with operators that expect bytes, e.g.,\nread or save.

\n

<connector>

\n

The connector used to load bytes.

\n

Some connectors have connector-specific options. Please refer to the\ndocumentation of the individual connectors for more information.

\n

Examples

\n

Read bytes from stdin:

\n
load stdin\n
\n

Read bytes from the file path/to/eve.json:

\n
from file path/to/eve.json\n
', - docLink: "https://docs.tenzir.com/operators/sources/load", - }, - { - label: "shell", - type: "keyword", - detail: "Executes a system command and hooks its stdout into the pipeline.", - processedHTML: - '

Synopsis

\n
shell <command>\n
\n

Description

\n

Refer to shell as transformation for usage\ninstructions.

\n

The difference to the transformation is that the source operator receives the\nstandard input from the terminal where tenzir is executed from. If the\npipeline was not spawned by a terminal, no standard input is provided.

\n

Examples

\n

Show a live log from the tenzir-node service:

\n
shell "journalctl -u tenzir-node -f | read json"\n
', - docLink: "https://docs.tenzir.com/operators/sources/shell", - }, - { - label: "show", - type: "keyword", - detail: "Returns meta information about Tenzir and nodes.", - processedHTML: - '
Experimental

This operator is experimental and subject to change without notice, even in\nminor or patch releases.

\n

Synopsis

\n
show <aspect> [options]\n
\n

Description

\n

The show operator offers introspection capabilities to look at various\naspects of Tenzir.

\n

<aspect>

\n

Describes the part of Tenzir to look at.

\n

Available aspects:

\n\n

Examples

\n

Show all available connectors and formats:

\n
show connectors\nshow formats\n
\n

Show all transformations:

\n
show operators | where transformation == true\n
\n

Show all fields and partitions at a node:

\n
show fields\nshow partitions\n
\n

Show the version of a remote node:

\n
remote show version\n
', - docLink: "https://docs.tenzir.com/operators/sources/show", - }, - { - label: "batch", - type: "keyword", - detail: "The batch operator controls the batch size of events.", - processedHTML: - '
Expert Operator

The batch operator is a lower-level building block that lets users explicitly\ncontrol batching, which otherwise is controlled automatically by Tenzir\'s\nunderlying pipeline execution engine. Use with caution!

\n

Synopsis

\n
batch [<limit>]\n
\n

Description

\n

The batch operator takes its input and rewrites it into batches of up to the\ndesired size.

\n

<limit>

\n

An unsigned integer denoting how many events to put into one batch at most.

\n

Defaults to 65536.

\n

Examples

\n

Write exactly one NDJSON object at a time to a Kafka topic.

\n
batch 1 | to kafka -t topic write json -c\n
', - docLink: "https://docs.tenzir.com/operators/transformations/batch", - }, - { - label: "compress", - type: "keyword", - detail: "Compresses a stream of bytes.", - processedHTML: - '

Synopsis

\n
compress [--level=<level>] <codec>\n
\n

Description

\n

The compress operator compresses bytes in a pipeline incrementally with a\nknown codec.

\n
Streaming Compression

The operator uses Apache Arrow\'s compression\nutilities under the hood, and transparently supports\nall options that Apache Arrow supports for streaming compression.

Besides the supported brotli, bz2, gzip, lz4, and zstd, Apache Arrow\nalso ships with codecs for lzo, lz4_raw, lz4_hadoop and snappy, which\nonly support oneshot compression. Support for them is not currently implemented.

\n

--level=<level>

\n

The compression level to use. The supported values depend on the codec used. If\nomitted, the default level for the codec is used.

\n

<codec>

\n

An identifier of the codec to use. Currently supported are brotli, bz2,\ngzip, lz4, and zstd.

\n

Examples

\n

Export all events in a Gzip-compressed NDJSON file:

\n
export\n| write json --compact-output\n| compress gzip\n| save file /tmp/backup.json.gz\n
\n

Recompress a Zstd-compressed file at a higher compression level:

\n
load file in.zst\n| decompress zstd\n| compress --level 18 zstd\n| save file out.zst\n
', - docLink: "https://docs.tenzir.com/operators/transformations/compress", - }, - { - label: "decapsulate", - type: "keyword", - detail: "Decapsulates packet data at link, network, and transport layer.", - processedHTML: - '
Deprecated

This operator will soon be removed in favor of first-class support for functions\nthat can be used in a variety of different operators and contexts.

\n

Synopsis

\n
decapsulate\n
\n

Description

\n

The decapsulate operator proceses events of type pcap.packet and\ndecapsulates the packet payload by extracting fields at the link, network, and\ntransport layer. The aim is not completeness, but rather exposing commonly used\nfield for analytics.

\n

The operator only processes events of type pcap.packet and emits events of\ntype tenzir.packet.

\n

VLAN Tags

\n

While decapsulating packets, decapsulate extracts\n802.1Q VLAN tags into the nested\nvlan record, consisting of an outer and inner field for the respective\ntags. The value of the VLAN tag corresponds to the 12-bit VLAN identifier (VID).\nSpecial values include 0 (frame does not carry a VLAN ID) and 0xFFF\n(reserved value; sometimes wildcard match).

\n

Examples

\n

Decapsulate packets from a PCAP file:

\n
from file /tmp/trace.pcap read pcap\n| decapsulate\n
\n

Extract packets as JSON that have the address 6.6.6.6 as source or destination,\nand destination port 5158:

\n
read pcap\n| decapsulate\n| where 6.6.6.6 && dport == 5158\n| write json\n
\n

Query VLAN IDs using vlan.outer and vlan.inner:

\n
read pcap\n| decapsulate\n| where vlan.outer > 0 || vlan.inner in [1, 2, 3]\n
\n

Filter packets by Community\nID:

\n
read pcap\n| decapsulate\n| where community_id == "1:wCb3OG7yAFWelaUydu0D+125CLM="\n
', - docLink: "https://docs.tenzir.com/operators/transformations/decapsulate", + { + "label": "put", + "type": "keyword", + "detail": "Returns new events that only contain a set of specified fields.", + "processedHTML": "

Synopsis

\n
put <field[=operand]>...\n
\n

Description

\n

The put operator produces new events according to a specified list of fields.\nAll other fields are removed from the input.

\n

The difference between put and extend is that put drops all\nfields not explicitly specified, whereas extend only appends fields.

\n

<field[=operand]>

\n

The field describes the name of the field to select. The extended form with an\noperand assignment allows for computing functions over existing fields.

\n

If the right-hand side of the assignment\nis omitted, the field name is implicitly used as an extractor. If multiple\nfields match the extractor, the first matching field is used in the output. If\nno fields match, null is assigned instead.

\n

Examples

\n

Overwrite values of the field payload with a fixed value:

\n
put payload=\"REDACTED\"\n
\n

Create connection 4-tuples:

\n
put src_ip, src_port, dst_ip, dst_port\n
\n

Unlike select, put reorders fields. If the specified fields\ndo not exist in the input, null values will be assigned.

\n

You can also reference existing fields:

\n
put src_ip, src_port, dst_ip=dest_ip, dst_port=dest_port\n
", + "docLink": "https://docs.tenzir.com/operators/put" }, { - label: "decompress", - type: "keyword", - detail: "Decompresses a stream of bytes.", - processedHTML: - '

Synopsis

\n
decompress <codec>\n
\n

Description

\n

The decompress operator decompresses bytes in a pipeline incrementally with a\nknown codec. The operator supports decompressing multiple concatenated streams\nof the same codec transparently.

\n
Streaming Decompression

The operator uses Apache Arrow\'s compression\nutilities under the hood, and transparently supports\nall options that Apache Arrow supports for streaming decompression.

Besides the supported brotli, bz2, gzip, lz4, and zstd, Apache Arrow\nalso ships with codecs for lzo, lz4_raw, lz4_hadoop and snappy, which\nonly support oneshot decompression. Support for them is not currently implemented.

\n

<codec>

\n

An identifier of the codec to use. Currently supported are brotli, bz2,\ngzip, lz4, and zstd.

\n

Examples

\n

Import Suricata events from a Zstd-compressed file:

\n
load file eve.json.zst\n| decompress zstd\n| read suricata\n| import\n
\n

Convert a Zstd-compressed file into an LZ4-compressed file:

\n
load file in.zst\n| decompress zstd\n| compress lz4\n| save file out.lz4\n
', - docLink: "https://docs.tenzir.com/operators/transformations/decompress", + "label": "python", + "type": "keyword", + "detail": "Executes Python code against each event of the input.", + "processedHTML": "

Synopsis

\n
python [--requirements <string>] <code>\npython [--requirements <string>] --file <path>\n
\n
Requirements

A Python 3 (>=3.10) interpreter must be present in the PATH environment\nvariable of the tenzir or tenzir-node process.

\n

Description

\n

The python operator executes user-provided Python code against each event of\nthe input.

\n

By default, the Tenzir node executing the pipeline creates a virtual environment\ninto which the tenzir Python package is installed. This behavior can be turned\noff in the node configuration using the plugin.python.create-venvs boolean\noption.

\n
Performance

The python operator implementation applies the provided Python code to each\ninput row one bw one. We use\nPyArrow to convert the input\nvalues to native Python data types and back to the Tenzir data model after the\ntransformation.

\n

--requirements <string>

\n

The --requirements flag can be used to pass additional package dependencies in\nthe pip format. When it is used, the argument is passed on to pip install in a\ndedicated virtual environment.

\n

The string is passed verbatim to pip install. To add multiple dependencies,\nseparate them with a space: --requirements \"foo bar\".

\n

<code>

\n

The provided Python code describes an event-for-event transformation, i.e., it\nis executed once for each input event and produces exactly output event.

\n

An implicitly defined self variable represents the event. Modify it to alter\nthe output of the operator. Fields of the event can be accessed with the dot\nnotation. For example, if the input event contains fields a and b then the\nPython code can access and modify them using self.a and self.b. Similarly,\nnew fields are added by assigning to self.fieldname and existing fields can be\nremoved by deleting them from self. When new fields are added, it is required\nthat the new field has the same type for every row of the event.

\n

--file <path>

\n

Instead of providing the code inline, the --file option allows for passing\na path to a file containing the code the operator executes per event.

\n

Examples

\n

Insert or modify the field x and set it to \"hello, world\":

\n
python 'self.x = \"hello, world\"'\n
\n

Clear the contents of self to remove the implicit input values from the\noutput:

\n
python '\n  self.clear()\n  self.x = 23\n'\n
\n

Define a new field x as the square root of the field y, and remove y from\nthe output:

\n
python '\n  import math\n  self.x = math.sqrt(self.y)\n  del self.y\n'\n
\n

Make use of third party packages:

\n
python --requirements \"requests=^2.30\" '\n  import requests\n  requests.post(\"http://imaginary.api/receive\", data=self)\n'\n
", + "docLink": "https://docs.tenzir.com/operators/python" }, { - label: "drop", - type: "keyword", - detail: "Drops fields from the input.", - processedHTML: - '

Synopsis

\n
drop <extractor>...\n
\n

Description

\n

The drop operator removes all fields matching the provided extractors and\nkeeps all other fields. It is the dual to select.

\n

In relational algebra, drop performs a projection of the complement of the\nprovided arguments.

\n

<extractor>...

\n

A comma-separated list of extractors that identify the fields to remove.

\n

Examples

\n

Remove the fields foo and bar:

\n
drop foo, bar\n
\n

Remove all fields of type ip:

\n
drop :ip\n
', - docLink: "https://docs.tenzir.com/operators/transformations/drop", + "label": "rare", + "type": "keyword", + "detail": "Shows the least common values. The dual to top.", + "processedHTML": "

Synopsis

\n
rare <field> [--count-field=<count-field>|-c <count-field>]\n
\n

Description

\n

Shows the least common values for a given field. For each unique value, a new event containing its count will be produced.

\n

<field>

\n

The name of the field to find the least common values for.

\n

--count-field=<count-field>|-c <count-field>

\n

An optional argument specifying the field name of the count field. Defaults to count.

\n

The count field and the value field must have different names.

\n

Examples

\n

Find the least common values for field id.orig_h.

\n
rare id.orig_h\n
\n

Find the least common values for field count and present the value amount in a field amount.

\n
rare count --count-field=amount\n
", + "docLink": "https://docs.tenzir.com/operators/rare" }, { - label: "enumerate", - type: "keyword", - detail: "Prepend a column with row numbers.", - processedHTML: - '

Synopsis

\n
enumerate [<field>]\n
\n

Description

\n

The enumerate operator prepends a new column with row numbers to the beginning\nof the input record.

\n
Per-schema Counting

The operator counts row numbers per schema. We plan to change this behavior with\na in the future once we have a modifer that toggles "per-schema-ness"\nexplicitly.

\n

<field>

\n

Sets the name of the output field.

\n

Defaults to # to avoid conflicts with existing field names.

\n

Examples

\n

Enumerate the input by prepending row numbers:

\n
from file eve.json read suricata | select event_type | enumerate | write json\n
\n
{"#": 0, "event_type": "alert"}\n{"#": 0, "event_type": "flow"}\n{"#": 1, "event_type": "flow"}\n{"#": 0, "event_type": "http"}\n{"#": 1, "event_type": "alert"}\n{"#": 1, "event_type": "http"}\n{"#": 2, "event_type": "flow"}\n{"#": 0, "event_type": "fileinfo"}\n{"#": 3, "event_type": "flow"}\n{"#": 4, "event_type": "flow"}\n
\n

Use index as field name instead of the default:

\n
enumerate index\n
', - docLink: "https://docs.tenzir.com/operators/transformations/enumerate", + "label": "read", + "type": "keyword", + "detail": "The read operator converts raw bytes into events.", + "processedHTML": "

Synopsis

\n
read <format>\n
\n

Description

\n

The read operator parses events by interpreting its input bytes in a given\nformat.

\n

<format>

\n

The format used to convert raw bytes into events.

\n

Some formats have format-specific options. Please refer to the documentation of\nthe individual formats for more information.

\n

Examples

\n

Read the input bytes as Zeek TSV logs:

\n
read zeek-tsv\n
\n

Read the input bytes as Suricata Eve JSON:

\n
read suricata\n
", + "docLink": "https://docs.tenzir.com/operators/read" }, { - label: "extend", - type: "keyword", - detail: "Appends fields to events.", - processedHTML: - '

Synopsis

\n
extend <field=operand>...\n
\n

Description

\n

The extend appends a specified list of fields to the input. All existing\nfields remain intact.

\n

The difference between extend and put is that put drops all\nfields not explicitly specified, whereas extend only appends fields.

\n

The difference between extend and replace is that replace\noverwrites existing fields, whereas extend doesn\'t touch the input.

\n

<field=operand>

\n

The assignment consists of field that describes the new field name and\noperand that defines the field value.

\n

Examples

\n

Add new fields with fixed values:

\n
extend secret="xxx", ints=[1, 2, 3], strs=["a", "b", "c"]\n
\n

Duplicate a column:

\n
extend source=src_ip\n
', - docLink: "https://docs.tenzir.com/operators/transformations/extend", + "label": "rename", + "type": "keyword", + "detail": "Renames fields and types.", + "processedHTML": "

Synopsis

\n
rename <name=extractor>...\n
\n

Description

\n

The rename operator assigns new names to fields or types. Renaming only\nmodifies metadata and is therefore computationally inexpensive. The operator\nhandles nested field extractors as well, but cannot perform field reordering,\ne.g., by hoisting nested fields into the top level.

\n

Renaming only takes place if the provided extractor on the right-hand side of\nthe assignment resolves to a field or type. Otherwise the assignment does\nnothing. If no extractors match, rename degenerates to pass.

\n

<name=extractor>...

\n

An assignment of the form name=extractor renames the field or type identified\nby extractor to name.

\n

Examples

\n

Rename events of type suricata.flow to connection:

\n
rename connection=:suricata.flow\n
\n

Assign new names to the fields src_ip and dest_ip:

\n
rename src=src_ip, dst=dest_ip\n
\n

Give the nested field orig_h nested under the record id the name src_ip:

\n
rename src=id.orig_h\n
\n

Same as above, but consider fields at any nesting hierarchy:

\n
rename src=orig_h\n
", + "docLink": "https://docs.tenzir.com/operators/rename" }, { - label: "flatten", - type: "keyword", - detail: "Flattens nested data.", - processedHTML: - '

Synopsis

\n
flatten [<separator>]\n
\n

Description

\n

The flatten operator acts on container\ntypes:

\n
    \n
  1. Records: Join nested records with a separator (. by default). For\nexample, if a field named x is a record with fields a and b, flattening\nwill lift the nested record into the parent scope by creating two new fields\nx.a and x.b.
  2. \n
  3. Lists: Merge nested lists into a single (flat) list. For example,\n[[[2]], [[3, 1]], [[4]]] becomes [2, 3, 1, 4].
  4. \n
\n

For records inside lists, flatten "pushes lists down" into one list per record\nfield. For example, the record

\n
{\n  "foo": [\n    {\n      "a": 2,\n      "b": 1\n    },\n    {\n      "a": 4\n    }\n  ]\n}\n
\n

becomes

\n
{\n  "foo.a": [2, 4],\n  "foo.b": [1, null]\n}\n
\n

Lists nested in records that are nested in lists will also be flattened. For\nexample, the record

\n
{\n  "foo": [\n    {\n      "a": [\n        [2, 23],\n        [1,16]\n      ],\n      "b": [1]\n    },\n    {\n      "a": [[4]]\n    }\n  ]\n}\n
\n

becomes

\n
{\n  "foo.a": [\n    2,\n    23,\n    1,\n    16,\n    4\n  ],\n  "foo.b": [\n    1\n  ]\n}\n
\n

As you can see from the above examples, flattening also removes null values.

\n

<separator>

\n

The separator string to join the field names of nested records.

\n

Defaults to ..

\n

Examples

\n

Consider the following record:

\n
{\n  "src_ip": "147.32.84.165",\n  "src_port": 1141,\n  "dest_ip": "147.32.80.9",\n  "dest_port": 53,\n  "event_type": "dns",\n  "dns": {\n    "type": "query",\n    "id": 553,\n    "rrname": "irc.freenode.net",\n    "rrtype": "A",\n    "tx_id": 0,\n    "grouped": {\n      "A": ["tenzir.com", null]\n    }\n  }\n}\n
\n

After flatten the record looks as follows:

\n
{\n  "src_ip": "147.32.84.165",\n  "src_port": 1141,\n  "dest_ip": "147.32.80.9",\n  "dest_port": 53,\n  "event_type": "dns",\n  "dns.type": "query",\n  "dns.id": 553,\n  "dns.rrname": "irc.freenode.net",\n  "dns.rrtype": "A",\n  "dns.tx_id": 0,\n  "dns.grouped.A": ["tenzir.com"]\n}\n
\n

Note that dns.grouped.A no longer contains a null value.

', - docLink: "https://docs.tenzir.com/operators/transformations/flatten", + "label": "repeat", + "type": "keyword", + "detail": "Repeats the input a number of times.", + "processedHTML": "

Synopsis

\n
repeat [<repetitions>]\n
\n

Description

\n

The repeat operator relays the input without any modification, and repeats its\ninputs a specified number of times. It is primarily used for testing and when\nworking with generated data.

\n

The repeat operator keeps its input in memory. Avoid using it to repeat large\ndata sets.

\n

<repetitions>

\n

The number of times to repeat the input data.

\n

If not specified, the operator repeats its input indefinitely.

\n

Examples

\n

Given the following events as JSON:

\n
{\"number\": 1, \"text\": \"one\"}\n{\"number\": 2, \"text\": \"two\"}\n
\n

The repeat operator will repeat them indefinitely, in order:

\n
repeat\n
\n
{\"number\": 1, \"text\": \"one\"}\n{\"number\": 2, \"text\": \"two\"}\n{\"number\": 1, \"text\": \"one\"}\n{\"number\": 2, \"text\": \"two\"}\n{\"number\": 1, \"text\": \"one\"}\n{\"number\": 2, \"text\": \"two\"}\n// …\n
\n

To just repeat the first event 5 times, use:

\n
head 1 | repeat 5\n
\n
{\"number\": 1, \"text\": \"one\"}\n{\"number\": 1, \"text\": \"one\"}\n{\"number\": 1, \"text\": \"one\"}\n{\"number\": 1, \"text\": \"one\"}\n{\"number\": 1, \"text\": \"one\"}\n
", + "docLink": "https://docs.tenzir.com/operators/repeat" }, { - label: "hash", - type: "keyword", - detail: "Computes a SHA256 hash digest of a given field.", - processedHTML: - '
Deprecated

This operator will soon be removed in favor of first-class support for functions\nthat can be used in a variety of different operators and contexts.

\n

Synopsis

\n
hash [-s|--salt=<string>] <field>\n
\n

Description

\n

The hash operator calculates a hash digest of a given field.

\n

<-s|--salt>=<string>

\n

A salt value for the hash.

\n

<field>

\n

The field over which the hash is computed.

\n

Examples

\n

Hash all values of the field username using the salt value "xxx" and store\nthe digest in a new field username_hashed:

\n
hash --salt="B3IwnumKPEJDAA4u" username\n
', - docLink: "https://docs.tenzir.com/operators/transformations/hash", + "label": "replace", + "type": "keyword", + "detail": "Replaces the fields matching the given extractors with fixed values.", + "processedHTML": "

Synopsis

\n
replace <extractor=operand>...\n
\n

Description

\n

The replace operator mutates existing fields by providing a new value.

\n

The difference between replace and extend is that replace\noverwrites existing fields, whereas extend doesn't touch the input.

\n

<extractor=operand>

\n

The assignment consists of an extractor that matches against existing fields\nand an operand that defines the new field value.

\n

If field does not exist in the input, the operator degenerates to\npass. Use the set operator to extend fields that cannot\nbe replaced.

\n

Examples

\n

Replace the field the field src_ip with a fixed value:

\n
replace src_ip=0.0.0.0\n
\n

Replace all IP address with a fixed value:

\n
replace :ip=0.0.0.0\n
", + "docLink": "https://docs.tenzir.com/operators/replace" }, { - label: "head", - type: "keyword", - detail: "Limits the input to the first N events.", - processedHTML: - "

Synopsis

\n
head [<limit>]\n
\n

Description

\n

The semantics of the head operator are the same of the equivalent Unix tool:\nprocess a fixed number of events from the input. The operator terminates\nafter it has reached its limit.

\n

<limit>

\n

An unsigned integer denoting how many events to keep.

\n

Defaults to 10.

\n

Examples

\n

Get the first ten events:

\n
head\n
\n

Get the first five events:

\n
head 5\n
", - docLink: "https://docs.tenzir.com/operators/transformations/head", + "label": "save", + "type": "keyword", + "detail": "The save operator saves bytes to a connector.", + "processedHTML": "

Synopsis

\n
save <uri>\nsave <path>\nsave <connector>\n
\n

Description

\n

The save operator operates on raw bytes.

\n

Notably, it cannot be used after an operator that emits events, but rather only\nwith operators that emit bytes, e.g., write or load.

\n

<connector>

\n

The connector used to save bytes.

\n

Some connectors have connector-specific options. Please refer to the\ndocumentation of the individual connectors for more information.

\n

Examples

\n

Write bytes to stdout:

\n
save stdin\n
\n

Write bytes to the file path/to/eve.json:

\n
save path/to/eve.json\nsave file path/to/eve.json\n
", + "docLink": "https://docs.tenzir.com/operators/save" }, { - label: "measure", - type: "keyword", - detail: "Replaces the input with metrics describing the input.", - processedHTML: - '

Synopsis

\n
measure [--real-time] [--cumulative]\n
\n

Description

\n

The measure operator yields metrics for each received batch of events or bytes\nusing the following schema, respectively:

\n
type tenzir.metrics.events = record  {\n  timestamp: time,\n  schema: string,\n  schema_id: string,\n  events: uint64,\n}\n
\n
type tenzir.metrics.bytes = record  {\n  timestamp: time,\n  bytes: uint64,\n}\n
\n

--real-time

\n

Emit metrics immediately with every batch, rather than buffering until the\nupstream operator stalls, i.e., is idle or waiting for further input.

\n

The --real-time option is useful when inspect should emit data without\nlatency.

\n

--cumulative

\n

Emit running totals for the events and bytes fields rather than per-batch\nstatistics.

\n

Examples

\n

Get the number of bytes read incrementally for a file:

\n
{"timestamp": "2023-04-28T10:22:10.192322", "bytes": 16384}\n{"timestamp": "2023-04-28T10:22:10.223612", "bytes": 16384}\n{"timestamp": "2023-04-28T10:22:10.297169", "bytes": 16384}\n{"timestamp": "2023-04-28T10:22:10.387172", "bytes": 16384}\n{"timestamp": "2023-04-28T10:22:10.408171", "bytes": 8232}\n
\n

Get the number of events read incrementally from a file:

\n
{"timestamp": "2023-04-28T10:26:45.159885", "events": 65536, "schema": "suricata.dns", "schema_id": "d49102998baae44a"}\n{"timestamp": "2023-04-28T10:26:45.812321", "events": 412, "schema": "suricata.dns", "schema_id": "d49102998baae44a"}\n
\n

Get the total number of events in a file, grouped by schema:

\n
{"events": 65948, "schema": "suricata.dns"}\n
', - docLink: "https://docs.tenzir.com/operators/transformations/measure", + "label": "select", + "type": "keyword", + "detail": "Selects fields from the input.", + "processedHTML": "

Synopsis

\n
select <extractor>...\n
\n

Description

\n

The select operator keeps only the fields matching the provided extractors and\nremoves all other fields. It is the dual to drop.

\n

In relational algebra, select performs a projection of the provided\narguments.

\n

<extractor>...

\n

A comma-separated list of extractors that identify the fields to keep.

\n

Examples

\n

Only keep fields foo and bar:

\n
select foo, bar\n
\n

Select all fields of type ip:

\n
select :ip\n
", + "docLink": "https://docs.tenzir.com/operators/select" }, { - label: "pass", - type: "keyword", - detail: "Does nothing with the input.", - processedHTML: - '

Synopsis

\n
pass\n
\n

Description

\n

The pass operator relays the input without any modification. It exists\nprimarily for testing and debugging.

\n

You can think of pass as the "identity" operator.

\n

Examples

\n

Forward the input without any changes:

\n
pass\n
', - docLink: "https://docs.tenzir.com/operators/transformations/pass", + "label": "serve", + "type": "keyword", + "detail": "Make events available under the \\[/serve REST API", + "processedHTML": "

endpoint](/api#/paths/~1serve/post).

\n

Synopsis

\n
serve [--buffer-size <buffer-size>] <serve-id>\n
\n

Description

\n

The serve operator bridges between pipelines and the corresponding /serve\nREST API endpoint:

\n

\"Serve

\n

Pipelines ending with the serve operator exit when all events have been\ndelivered over the corresponding endpoint.

\n

--buffer-size <buffer-size>

\n

The buffer size specifies the maximum number of events to keep in the serve\noperator to make them instantly available in the corresponding endpoint before\nthrottling the pipeline execution.

\n

Defaults to 64Ki.

\n

<serve-id>

\n

The serve id is an identifier that uniquely identifies the operator. The serve\noperator errors when receiving a duplicate serve id.

\n

Examples

\n

Read a Zeek conn.log, 100 events at a time:

\n
tenzir 'from file path/to/conn.log read zeek-tsv | serve zeek-conn-logs'\n
\n
curl \\\n  -X POST \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"serve_id\": \"zeek-conn-logs\", \"continuation_token\": null, \"timeout\": \"1s\", \"max_events\": 100}' \\\n  http://localhost:5160/api/v0/serve\n
\n

This will return up to 100 events, or less if the specified timeout of 1 second\nexpired.

\n

Subsequent results for further events must specify a continuation token. The\ntoken is included in the response under next_continuation_token if there are\nfurther events to be retrieved from the endpoint.

\n

Wait for an initial event

\n

This pipeline will produce 10 events after 3 seconds of doing nothing.

\n
tenzir \"shell \\\"sleep 3; jq --null-input '{foo: 1}'\\\" | read json | repeat 10 | serve slow-events\"\n
\n
curl \\\n  -X POST \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\"serve_id\": \"slow-events\", \"continuation_token\": null, \"timeout\": \"5s\", \"min_events\": 1}' \\\n  http://localhost:5160/api/v0/serve\n
\n

The call to /serve will wait up to 5 seconds for the first event from the pipeline arriving at the serve operator,\nand return immediately once the first event arrives.

", + "docLink": "https://docs.tenzir.com/operators/serve" }, { - label: "pseudonymize", - type: "keyword", - detail: "Pseudonymizes fields according to a given method.", - processedHTML: - '
Deprecated

This operator will soon be removed in favor of first-class support for functions\nthat can be used in a variety of different operators and contexts.

\n

Synopsis

\n
pseudonymize -m|--method=<string> -s|--seed=<seed> <extractor>...\n
\n

Description

\n

The pseudonimize operator replaces IP address using the\nCrypto-PAn algorithm.

\n

Currently, pseudonimize exclusively works for fields of type ip.

\n

-m|--method=<string>

\n

The algorithm for pseudonimization

\n

-s|--seed=<seed>

\n

A 64-byte seed that describes a hexadecimal value. When the seed is shorter than\n64 bytes, the operator will append zeros to match the size; when it is longer,\nit will truncate the seed.

\n

<extractor>...

\n

The list of extractors describing fields to pseudonomize. If an extractor\nmatches types other than IP addresses, the operator will ignore them.

\n

Example

\n

Pseudonymize all values of the fields src_ip and dest_ip using the\ncrypto-pan algorithm and deadbeef seed:

\n
pseudonymize --method="crypto-pan" --seed="deadbeef" src_ip, dest_ip\n
', - docLink: "https://docs.tenzir.com/operators/transformations/pseudonymize", + "label": "set", + "type": "keyword", + "detail": "Upserts fields in events.", + "processedHTML": "

Synopsis

\n
set <field=operand>...\n
\n

Description

\n

The set operator sets a list of fields to the given values. It overwrites old\nvalues of fields matching the field expression, or creates new fields of a\ngiven name otherwise.

\n

<field=operand>

\n

The assignment consists of field that describes the new field name and\noperand that defines the field value. If the field name already exists, the\noperator replaces the value of the field.

\n

Examples

\n

Upsert new fields with fixed values:

\n
set secret=\"xxx\", ints=[1, 2, 3], strs=[\"a\", \"b\", \"c\"]\n
\n

Move a column, replacing the old value with null.

\n
set source=src_ip, src_ip=null\n
", + "docLink": "https://docs.tenzir.com/operators/set" }, { - label: "put", - type: "keyword", - detail: "Returns new events that only contain a set of specified fields.", - processedHTML: - '

Synopsis

\n
put <field[=operand]>...\n
\n

Description

\n

The put operator produces new events according to a specified list of fields.\nAll other fields are removed from the input.

\n

The difference between put and extend is that put drops all\nfields not explicitly specified, whereas extend only appends fields.

\n

<field[=operand]>

\n

The field describes the name of the field to select. The extended form with an\noperand assignment allows for computing functions over existing fields.

\n

If the right-hand side of the assignment\nis omitted, the field name is implicitly used as an extractor. If multiple\nfields match the extractor, the first matching field is used in the output. If\nno fields match, null is assigned instead.

\n

Examples

\n

Overwrite values of the field payload with a fixed value:

\n
put payload="REDACTED"\n
\n

Create connection 4-tuples:

\n
put src_ip, src_port, dst_ip, dst_port\n
\n

Unlike select, put reorders fields. If the specified fields\ndo not exist in the input, null values will be assigned.

\n

You can also reference existing fields:

\n
put src_ip, src_port, dst_ip=dest_ip, dst_port=dest_port\n
', - docLink: "https://docs.tenzir.com/operators/transformations/put", + "label": "shell", + "type": "keyword", + "detail": "Executes a system command and hooks its stdin and stdout into the pipeline.", + "processedHTML": "

Synopsis

\n
shell <command>\n
\n

Description

\n

The shell operator executes the provided command by spawning a new process.\nThe input of the operator is forwarded to the child's standard input. Similarly,\nthe child's standard output is forwarded to the output of the operator.

\n

<command>

\n

The command to execute and hook into the pipeline processing.

\n

The value of command is a single string. If you would like to pass a command\nline as you would on the shell, use single or double quotes for escaping, e.g.,\nshell 'jq -C' or shell \"jq -C\". The command is interpreted by /bin/sh -c.

\n

Examples

\n

Show a live log from the tenzir-node service:

\n
shell \"journalctl -u tenzir-node -f | read json\"\n
\n

Consider the use case of converting CSV to JSON:

\n
tenzir 'read csv | write json' | jq -C\n
\n

The write json operator produces NDJSON. Piping this output to jq generates a\ncolored, tree-structured variation that is (arguably) easier to read. Using the\nshell operator, you can integrate Unix tools that rely on\nstdin/stdout for input/output as \"native\" operators that process raw bytes. For\nexample, in this pipeline:

\n
write json | save stdout\n
\n

The write operator produces raw bytes and save\naccepts raw bytes. The shell operator therefore fits right in the middle:

\n
write json | shell \"jq -C\" | save stdout\n
\n

Using user-defined operators, we can\nexpose this (potentially verbose) post-processing more succinctly in the\npipeline language:

\n
tenzir:\n  operators:\n    jsonize:\n      write json | shell \"jq -C\" | save stdout\n
\n

Now you can use jsonize as a custom operator in a pipeline:

\n
tenzir 'read csv | where field > 42 | jsonize' < file.csv\n
\n

This mechanism allows for wrapping also more complex invocation of tools.\nZeek, for example, converts packets into structured network\nlogs. Tenzir already has support for consuming Zeek output with the formats\nzeek-json and\nzeek-tsv. But that requires attaching yourself\ndownstream of a Zeek instance. Sometimes you want instant Zeek analytics given a\nPCAP trace.

\n

With the shell operator, you can script a Zeek invocation and readily\npost-process the output with a rich set of operators, to filter, reshape,\nenrich, or route the logs as structured data. Let's define a zeek operator for\nthat:

\n
tenzir:\n  operators:\n    zeek:\n      shell \"zeek -r - LogAscii::output_to_stdout=T\n             JSONStreaming::disable_default_logs=T\n             JSONStreaming::enable_log_rotation=F\n             json-streaming-logs\"\n      | read zeek-json\n
\n

Processing a PCAP trace now is a matter of calling the zeek operator:

\n
gunzip -c example.pcap.gz |\n  tenzir 'zeek | select id.orig_h, id.orig_p, id.resp_h | head 3'\n
\n
{\"id\": {\"orig_h\": null, \"resp_h\": null, \"resp_p\": null}}\n{\"id\": {\"orig_h\": \"192.168.168.100\", \"resp_h\": \"83.135.95.78\", \"resp_p\": 0}}\n{\"id\": {\"orig_h\": \"192.168.168.100\", \"resp_h\": \"83.135.95.78\", \"resp_p\": 22}}\n
\n

NB: because zeek (= shell) reads bytes, we can drop the implicit load stdin source operator in this pipeline.

", + "docLink": "https://docs.tenzir.com/operators/shell" }, { - label: "rare", - type: "keyword", - detail: "Shows the least common values. The dual to top.", - processedHTML: - "

Synopsis

\n
rare <field> [--count-field=<count-field>|-c <count-field>]\n
\n

Description

\n

Shows the least common values for a given field. For each unique value, a new event containing its count will be produced.

\n

<field>

\n

The name of the field to find the least common values for.

\n

--count-field=<count-field>|-c <count-field>

\n

An optional argument specifying the field name of the count field. Defaults to count.

\n

The count field and the value field must have different names.

\n

Examples

\n

Find the least common values for field id.orig_h.

\n
rare id.orig_h\n
\n

Find the least common values for field count and present the value amount in a field amount.

\n
rare count --count-field=amount\n
", - docLink: "https://docs.tenzir.com/operators/transformations/rare", + "label": "show", + "type": "keyword", + "detail": "Returns information about a Tenzir node.", + "processedHTML": "

Synopsis

\n
show [<aspect>]\n
\n

Description

\n

The show operator offers introspection capabilities to look at various\naspects of a Tenzir node.

\n

<aspect>

\n

Describes the part of Tenzir to look at.

\n

Available aspects:

\n\n

We also offer some additional aspects for experts that want to take a deeper\nlook at what's going on:

\n\n

When no aspect is specified, all are shown.

\n

Examples

\n

Show all available connectors and formats:

\n
show connectors\nshow formats\n
\n

Show all transformations:

\n
show operators | where transformation == true\n
\n

Show all fields and partitions at a node:

\n
show fields\nshow partitions\n
\n

Show all aspects of a node:

\n
show\n
", + "docLink": "https://docs.tenzir.com/operators/show" }, { - label: "read", - type: "keyword", - detail: "The read operator converts raw bytes into events.", - processedHTML: - '

Synopsis

\n
read <format>\n
\n

Description

\n

The read operator parses events by interpreting its input bytes in a given\nformat.

\n

<format>

\n

The format used to convert raw bytes into events.

\n

Some formats have format-specific options. Please refer to the documentation of\nthe individual formats for more information.

\n

Examples

\n

Read the input bytes as Zeek TSV logs:

\n
read zeek-tsv\n
\n

Read the input bytes as Suricata Eve JSON:

\n
read suricata\n
', - docLink: "https://docs.tenzir.com/operators/transformations/read", - }, + "label": "sigma", + "type": "keyword", + "detail": "Filter the input with \\[Sigma rules]\\[sigma] and output matching events.", + "processedHTML": "

Synopsis

\n
sigma <rule> [--refresh-interval <refresh-interval>]\nsigma <directory> [--refresh-interval <refresh-interval>]\n
\n

Description

\n

The sigma operator executes Sigma rules on\nits input. If a rule matches, the operator emits a tenzir.sigma event that\nwraps the input record into a new record along with the matching rule. The\noperator discards all events that do not match the provided rules.

\n

For each rule, the operator transpiles the YAML into an\nexpression and instantiates a\nwhere operator, followed by put to generate an output.\nHere's how the transpilation works. The Sigma rule YAML format requires a\ndetection attribute that includes a map of named sub-expression called search\nidentifiers. In addition, detection must include a final condition that\ncombines search identifiers using boolean algebra (AND, OR, and NOT) or\nsyntactic sugar to reference groups of search expressions, e.g., using the\n1/all of * or plain wildcard syntax. Consider the following detection\nembedded in a rule:

\n
detection:\n  foo:\n    a: 42\n    b: \"evil\"\n  bar:\n    c: 1.2.3.4\n  condition: foo or not bar\n
\n

We translate this rule piece by building a symbol table of all keys (foo and\nbar). Each sub-expression is a valid expression in itself:

\n
    \n
  1. foo: a == 42 && b == \"evil\"
  2. \n
  3. bar: c == 1.2.3.4
  4. \n
\n

Finally, we combine the expression according to condition:

\n
(a == 42 && b == \"evil\") || ! (c == 1.2.3.4)\n
\n

We parse the YAML string values according to Tenzir's richer data model, e.g.,\nthe expression c: 1.2.3.4 becomes a field named c and value 1.2.3.4 of\ntype ip, rather than a string. Sigma also comes with its own event\ntaxonomy\nto standardize field names. The sigma operator currently does not normalize\nfields according to this taxonomy but rather takes the field names verbatim from\nthe search identifier.

\n

Sigma uses value\nmodifiers\nto select a concrete relational operator for given search predicate. Without a\nmodifier, Sigma uses equality comparison (==) of field and value. For example,\nthe contains modifier changes the relational operator to substring search, and\nthe re modifier switches to a regular expression match. The table below shows\nwhat modifiers the sigma operator supports, where ✅ means implemented, 🚧 not\nyet implemented but possible, and ❌ not yet supported:

\n

|Modifier|Use|sigmac|Tenzir|\n|--------|---|:----:|:--:|\n|contains|perform a substring search with the value|✅|✅|\n|startswith|match the value as a prefix|✅|✅|\n|endswith|match the value as a suffix|✅|✅|\n|base64|encode the value with Base64|✅|✅\n|base64offset|encode value as all three possible Base64 variants|✅|✅\n|utf16le/wide|transform the value to UTF16 little endian|✅|🚧\n|utf16be|transform the value to UTF16 big endian|✅|🚧\n|utf16|transform the value to UTF16|✅|🚧\n|re|interpret the value as regular expression|✅|✅\n|cidr|interpret the value as a IP CIDR|❌|✅\n|all|changes the expression logic from OR to AND|✅|✅\n|lt|compare less than (<) the value|❌|✅\n|lte|compare less than or equal to (<=) the value|❌|✅\n|gt|compare greater than (>) the value|❌|✅\n|gte|compare greater than or equal to (>=) the value|❌|✅\n|expand|expand value to placeholder strings, e.g., %something%|❌|❌

\n

<rule.yaml>

\n

The rule to match.

\n

This invocation transpiles rule.yaml at the time of pipeline creation.

\n

<directory>

\n

The directory to watch.

\n

This invocation watches a directory and attempts to parse each contained file as\na Sigma rule. The sigma operator matches if any of the contained rules\nmatch, effectively creating a disjunction of all rules inside the directory.

\n

--refresh-interval <refresh-interval>

\n

How often the Sigma operator looks at the specified rule or directory of rules\nto update its internal state.

\n

Defaults to 5 seconds.

\n

Examples

\n

Apply a Sigma rule to an EVTX file using\nevtx_dump:

\n
evtx_dump -o jsonl file.evtx | tenzir 'read json | sigma rule.yaml'\n
\n

Apply a Sigma rule over historical data in a node from the last day:

\n
export | where :timestamp > 1 day ago | sigma rule.yaml\n
\n

Watch a directory of Sigma rules and apply all of them on a continuous stream of\nSuricata events:

\n
from file --follow eve.json read suricata | sigma /tmp/rules/\n
\n

When you add a new file to /tmp/rules, the sigma operator transpiles it and\nwill match it on all subsequent inputs.

", + "docLink": "https://docs.tenzir.com/operators/sigma" + }, { - label: "rename", - type: "keyword", - detail: "Renames fields and types.", - processedHTML: - '

Synopsis

\n
rename <name=extractor>...\n
\n

Description

\n

The rename operator assigns new names to fields or types. Renaming only\nmodifies metadata and is therefore computationally inexpensive. The operator\nhandles nested field extractors as well, but cannot perform field reordering,\ne.g., by hoisting nested fields into the top level.

\n

Renaming only takes place if the provided extractor on the right-hand side of\nthe assignment resolves to a field or type. Otherwise the assignment does\nnothing. If no extractors match, rename degenerates to pass.

\n

<name=extractor>...

\n

An assignment of the form name=extractor renames the field or type identified\nby extractor to name.

\n

Examples

\n

Rename events of type suricata.flow to connection:

\n
rename connection=:suricata.flow\n
\n

Assign new names to the fields src_ip and dest_ip:

\n
rename src=src_ip, dst=dest_ip\n
\n

Give the nested field orig_h nested under the record id the name src_ip:

\n
rename src=id.orig_h\n
\n

Same as above, but consider fields at any nesting hierarchy:

\n
rename src=orig_h\n
', - docLink: "https://docs.tenzir.com/operators/transformations/rename", - }, + "label": "slice", + "type": "keyword", + "detail": "Keep a range events within the half-closed interval \\[begin, end).", + "processedHTML": "

Synopsis

\n
slice [--begin <begin>] [--end <end>]\n
\n

Description

\n

The slice operator selects a range of events from the input. The semantics of\nthe operator match Python's array slicing.

\n

<begin>

\n

An signed integer denoting the beginning (inclusive) of the range to keep. Use a\nnegative number to count from the end.

\n

<end>

\n

An signed integer denoting the end (exclusive) of the range to keep. Use a\nnegative number to count from the end.

\n

Examples

\n

Get the second 100 events:

\n
slice --begin 100 --end 200\n
\n

Get the last five events:

\n
slice --begin -5\n
\n

Skip the last ten events:

\n
slice --end -10\n
\n

Return the last 50 events, except for the last 2:

\n
slice --begin -50 --end -2\n
\n

Skip the first and the last event:

\n
slice --begin 1 --end -1\n
", + "docLink": "https://docs.tenzir.com/operators/slice" + }, { - label: "repeat", - type: "keyword", - detail: "Repeats the input a number of times.", - processedHTML: - '

Synopsis

\n
repeat [<repetitions>]\n
\n

Description

\n

The repeat operator relays the input without any modification, and repeats its\ninputs a specified number of times. It is primarily used for testing and when\nworking with generated data.

\n

The repeat operator keeps its input in memory. Avoid using it to repeat large\ndata sets.

\n

<repetitions>

\n

The number of times to repeat the input data.

\n

If not specified, the operator repeats its input indefinitely.

\n

Examples

\n

Given the following events as JSON:

\n
{"number": 1, "text": "one"}\n{"number": 2, "text": "two"}\n
\n

The repeat operator will repeat them indefinitely, in order:

\n
repeat\n
\n
{"number": 1, "text": "one"}\n{"number": 2, "text": "two"}\n{"number": 1, "text": "one"}\n{"number": 2, "text": "two"}\n{"number": 1, "text": "one"}\n{"number": 2, "text": "two"}\n// …\n
\n

To just repeat the first event 5 times, use:

\n
head 1 | repeat 5\n
\n
{"number": 1, "text": "one"}\n{"number": 1, "text": "one"}\n{"number": 1, "text": "one"}\n{"number": 1, "text": "one"}\n{"number": 1, "text": "one"}\n
', - docLink: "https://docs.tenzir.com/operators/transformations/repeat", - }, + "label": "sockets", + "type": "keyword", + "detail": "Shows a snapshot of open sockets.", + "processedHTML": "

Synopsis

\n
sockets\n
\n

Description

\n

The sockets operator shows a snapshot of all currently open sockets.

\n

Schemas

\n

Tenzir emits socket information with the following schema.

\n

tenzir.socket

\n

Contains detailed information about the socket.

\n

|Field|Type|Description|\n|:-|:-|:-|\n|pid|uint64|The process identifier.|\n|process|string|The name of the process involved.|\n|protocol|uint64|The protocol used for the communication.|\n|local_addr|ip|The local IP address involved in the connection.|\n|local_port|port|The local port number involved in the connection.|\n|remote_addr|ip|The remote IP address involved in the connection.|\n|remote_port|port|The remote port number involved in the connection.|\n|state|string|The current state of the connection.|

\n

Examples

\n

Show process ID, local, and remote IP address of all sockets:

\n
sockets\n| select pid, local_addr, remote_addr \n
", + "docLink": "https://docs.tenzir.com/operators/sockets" + }, { - label: "replace", - type: "keyword", - detail: - "Replaces the fields matching the given extractors with fixed values.", - processedHTML: - '

Synopsis

\n
replace <extractor=operand>...\n
\n

Description

\n

The replace operator mutates existing fields by providing a new value.

\n

The difference between replace and extend is that replace\noverwrites existing fields, whereas extend doesn\'t touch the input.

\n

<extractor=operand>

\n

The assignment consists of an extractor that matches against existing fields\nand an operand that defines the new field value.

\n

If field does not exist in the input, the operator degenerates to\npass.

\n

Examples

\n

Replace the field the field src_ip with a fixed value:

\n
replace src_ip=0.0.0.0\n
\n

Replace all IP address with a fixed value:

\n
replace :ip=0.0.0.0\n
', - docLink: "https://docs.tenzir.com/operators/transformations/replace", - }, - { - label: "select", - type: "keyword", - detail: "Selects fields from the input.", - processedHTML: - '

Synopsis

\n
select <extractor>...\n
\n

Description

\n

The select operator keeps only the fields matching the provided extractors and\nremoves all other fields. It is the dual to drop.

\n

In relational algebra, select performs a projection of the provided\narguments.

\n

<extractor>...

\n

A comma-separated list of extractors that identify the fields to keep.

\n

Examples

\n

Only keep fields foo and bar:

\n
select foo, bar\n
\n

Select all fields of type ip:

\n
select :ip\n
', - docLink: "https://docs.tenzir.com/operators/transformations/select", - }, - { - label: "shell", - type: "keyword", - detail: - "Executes a system command and hooks its raw stdin and stdout into the pipeline.", - processedHTML: - '

Synopsis

\n
shell <command>\n
\n

Description

\n

The shell operator executes the provided command by spawning a new process.\nThe input of the operator is forwarded to the child\'s standard input. Similarly,\nthe child\'s standard output is forwarded to the output of the operator.

\n

You can also use shell as source operator.

\n

<command>

\n

The command to execute and hook into the pipeline processing.

\n

The value of command is a single string. If you would like to pass a command\nline as you would on the shell, use single or double quotes for escaping, e.g.,\nshell \'jq -C\' or shell "jq -C". The command is interpreted by /bin/sh -c.

\n

Examples

\n

Consider the use case of converting CSV to JSON:

\n
tenzir \'read csv | write json\' | jq -C\n
\n

The write json operator produces NDJSON. Piping this output to jq generates a\ncolored, tree-structured variation that is (arguably) easier to read. Using the\nshell operator, you can integrate Unix tools that rely on\nstdin/stdout for input/output as "native" operators that process raw bytes. For\nexample, in this pipeline:

\n
write json | save stdout\n
\n

The write operator produces raw bytes and\nsave accepts raw bytes. The shell operator therefore\nfits right in the middle:

\n
write json | shell "jq -C" | save stdout\n
\n

Using user-defined operators, we can expose this\n(potentially verbose) post-processing more succinctly in the pipeline language:

\n
tenzir:\n  operators:\n    jsonize:\n      write json | shell "jq -C" | save stdout\n
\n

Now you can use jsonize as a custom operator in a pipeline:

\n
tenzir \'read csv | where field > 42 | jsonize\' < file.csv\n
\n

This mechanism allows for wrapping also more complex invocation of tools.\nZeek, for example, converts packets into structured network\nlogs. Tenzir already has support for consuming Zeek output with the formats\nzeek-json and\nzeek-tsv. But that requires attaching yourself\ndownstream of a Zeek instance. Sometimes you want instant Zeek analytics given a\nPCAP trace.

\n

With the shell operator, you can script a Zeek invocation and readily\npost-process the output with a rich set of operators, to filter, reshape,\nenrich, or route the logs as structured data. Let\'s define a zeek operator for\nthat:

\n
tenzir:\n  operators:\n    zeek:\n      shell "zeek -r - LogAscii::output_to_stdout=T\n             JSONStreaming::disable_default_logs=T\n             JSONStreaming::enable_log_rotation=F\n             json-streaming-logs"\n      | read zeek-json\n
\n

Processing a PCAP trace now is a matter of calling the zeek operator:

\n
gunzip -c example.pcap.gz |\n  tenzir \'zeek | select id.orig_h, id.orig_p, id.resp_h | head 3\'\n
\n
{"id": {"orig_h": null, "resp_h": null, "resp_p": null}}\n{"id": {"orig_h": "192.168.168.100", "resp_h": "83.135.95.78", "resp_p": 0}}\n{"id": {"orig_h": "192.168.168.100", "resp_h": "83.135.95.78", "resp_p": 22}}\n
\n

NB: because zeek (= shell) reads bytes, we can drop the implicit load stdin source operator in this pipeline.

', - docLink: "https://docs.tenzir.com/operators/transformations/shell", - }, + "label": "sort", + "type": "keyword", + "detail": "Sorts events.", + "processedHTML": "

Synopsis

\n
sort [--stable] <field> [<asc>|<desc>] [<nulls-first>|<nulls-last>]\n
\n

Description

\n

Sorts events by a provided field.

\n

--stable

\n

Preserve the relative order of events that cannot be sorted because the provided\nfields resolve to the same value.

\n

<field>

\n

The name of the field to sort by.

\n

<asc>|<desc>

\n

Specifies the sort order.

\n

Defaults to asc.

\n

<nulls-first>|<nulls-last>

\n

Specifies how to order null values.

\n

Defaults to nulls-last.

\n

Examples

\n

Sort by the timestamp field in ascending order.

\n
sort timestamp\n
\n

Sort by the timestamp field in descending order.

\n
sort timestamp desc\n
\n

Arrange by field foo and put null values first:

\n
sort foo nulls-first\n
\n

Arrange by field foo in descending order and put null values first:

\n
sort foo desc nulls-first\n
", + "docLink": "https://docs.tenzir.com/operators/sort" + }, { - label: "sigma", - type: "keyword", - detail: - "Filter the input with \\[Sigma rules]\\[sigma] and output matching events.", - processedHTML: - '
Experimental

This operator is experimental and subject to change without notice, even in\nminor or patch releases.

\n

Synopsis

\n
sigma <rule> [--refresh-interval <refresh-interval>]\nsigma <directory> [--refresh-interval <refresh-interval>]\n
\n

Description

\n

The sigma operator executes Sigma rules on\nits input. If a rule matches, the operator emits a tenzir.sigma event that\nwraps the input record into a new record along with the matching rule. The\noperator discards all events that do not match the provided rules.

\n

For each rule, the operator transpiles the YAML into an\nexpression and instantiates a\nwhere operator, followed by put to generate an output.\nHere\'s how the transpilation works. The Sigma rule YAML format requires a\ndetection attribute that includes a map of named sub-expression called search\nidentifiers. In addition, detection must include a final condition that\ncombines search identifiers using boolean algebra (AND, OR, and NOT) or\nsyntactic sugar to reference groups of search expressions, e.g., using the\n1/all of * or plain wildcard syntax. Consider the following detection\nembedded in a rule:

\n
detection:\n  foo:\n    a: 42\n    b: "evil"\n  bar:\n    c: 1.2.3.4\n  condition: foo or not bar\n
\n

We translate this rule piece by building a symbol table of all keys (foo and\nbar). Each sub-expression is a valid expression in itself:

\n
    \n
  1. foo: a == 42 && b == "evil"
  2. \n
  3. bar: c == 1.2.3.4
  4. \n
\n

Finally, we combine the expression according to condition:

\n
(a == 42 && b == "evil") || ! (c == 1.2.3.4)\n
\n

We parse the YAML string values according to Tenzir\'s richer data model, e.g.,\nthe expression c: 1.2.3.4 becomes a field named c and value 1.2.3.4 of\ntype ip, rather than a string. Sigma also comes with its own event\ntaxonomy\nto standardize field names. The sigma operator currently does not normalize\nfields according to this taxonomy but rather takes the field names verbatim from\nthe search identifier.

\n

Sigma uses value\nmodifiers\nto select a concrete relational operator for given search predicate. Without a\nmodifier, Sigma uses equality comparison (==) of field and value. For example,\nthe contains modifier changes the relational operator to substring search, and\nthe re modifier switches to a regular expression match. The table below shows\nwhat modifiers the sigma operator supports, where ✅ means implemented, 🚧 not\nyet implemented but possible, and ❌ not yet supported:

\n

|Modifier|Use|sigmac|Tenzir|\n|--------|---|:----:|:--:|\n|contains|perform a substring search with the value|✅|✅|\n|startswith|match the value as a prefix|✅|✅|\n|endswith|match the value as a suffix|✅|✅|\n|base64|encode the value with Base64|✅|✅\n|base64offset|encode value as all three possible Base64 variants|✅|✅\n|utf16le/wide|transform the value to UTF16 little endian|✅|🚧\n|utf16be|transform the value to UTF16 big endian|✅|🚧\n|utf16|transform the value to UTF16|✅|🚧\n|re|interpret the value as regular expression|✅|✅\n|cidr|interpret the value as a IP CIDR|❌|✅\n|all|changes the expression logic from OR to AND|✅|✅\n|lt|compare less than (<) the value|❌|✅\n|lte|compare less than or equal to (<=) the value|❌|✅\n|gt|compare greater than (>) the value|❌|✅\n|gte|compare greater than or equal to (>=) the value|❌|✅\n|expand|expand value to placeholder strings, e.g., %something%|❌|❌

\n

<rule.yaml>

\n

The rule to match.

\n

This invocation transpiles rule.yaml at the time of pipeline creation.

\n

<directory>

\n

The directory to watch.

\n

This invocation watches a directory and attempts to parse each contained file as\na Sigma rule. The sigma operator matches if any of the contained rules\nmatch, effectively creating a disjunction of all rules inside the directory.

\n

--refresh-interval <refresh-interval>

\n

How often the Sigma operator looks at the specified rule or directory of rules\nto update its internal state.

\n

Defaults to 5 seconds.

\n

Examples

\n

Apply a Sigma rule to an EVTX file using\nevtx_dump:

\n
evtx_dump -o jsonl file.evtx | tenzir \'read json | sigma rule.yaml\'\n
\n

Apply a Sigma rule over historical data in a node from the last day:

\n
export | where :timestamp > 1 day ago | sigma rule.yaml\n
\n

Watch a directory of Sigma rules and apply all of them on a continuous stream of\nSuricata events:

\n
from file --follow read suricata | sigma /tmp/rules/\n
\n

When you add a new file to /tmp/rules, the sigma operator transpiles it and\nwill match it on all subsequent inputs.

', - docLink: "https://docs.tenzir.com/operators/transformations/sigma", + "label": "subscribe", + "type": "keyword", + "detail": "Subscribes to events from a channel with a topic. The dual to", + "processedHTML": "

publish.

\n

Synopsis

\n
subscribe [<topic>]\n
\n

Description

\n

The subscribe operator subscribes to events from a channel with the specified\ntopic. Multiple subscribe operators with the same topic receive the same\nevents.

\n

<topic>

\n

An optional topic identifying the channel events are published under.

\n

Examples

\n

Subscribe to the events under the topic zeek-conn:

\n
subscribe zeek-conn\n
", + "docLink": "https://docs.tenzir.com/operators/subscribe" }, { - label: "sort", - type: "keyword", - detail: "Sorts events.", - processedHTML: - '

Synopsis

\n
sort [--stable] <field> [<asc>|<desc>] [<nulls-first>|<nulls-last>]\n
\n

Description

\n

Sorts events by a provided field.

\n
Work in Progress

The implementation of the sort operator currently only works with field names.\nWe plan to support sorting by meta data, and more generally, entire expressions.\nTo date, the operator also lacks support sorting subnet fields.

\n

--stable

\n

Preserve the relative order of events that cannot be sorted because the provided\nfields resolve to the same value.

\n

<field>

\n

The name of the field to sort by.

\n

<asc>|<desc>

\n

Specifies the sort order.

\n

Defaults to asc.

\n

<nulls-first>|<nulls-last>

\n

Specifies how to order null values.

\n

Defaults to nulls-last.

\n

Examples

\n

Sort by the timestamp field in ascending order.

\n
sort timestamp\n
\n

Sort by the timestamp field in descending order.

\n
sort timestamp desc\n
\n

Arrange by field foo and put null values first:

\n
sort foo nulls-first\n
\n

Arrange by field foo in descending order and put null values first:

\n
sort foo desc nulls-first\n
', - docLink: "https://docs.tenzir.com/operators/transformations/sort", + "label": "summarize", + "type": "keyword", + "detail": "Groups events and applies aggregate functions on each group.", + "processedHTML": "

Synopsis

\n
summarize <[field=]aggregation>... [by <extractor>... [resolution <duration>]]\n
\n

Description

\n

The summarize operator groups events according to a grouping expression and\napplies an aggregation function over each group. The operator consumes the\nentire input before producing an output.

\n

Fields that neither occur in an aggregation function nor in the by list\nare dropped from the output.

\n

[field=]aggregation

\n

Aggregation functions compute a single value of one or more columns in a given\ngroup. Syntactically, aggregation has the form f(x) where f is the\naggregation function and x is a field.

\n

By default, the name for the new field aggregation is its string\nrepresentation, e.g., min(timestamp). You can specify a different name by\nprepending a field assignment, e.g., min_ts=min(timestamp).

\n

The following aggregation functions are available:

\n\n

by <extractor>

\n

The extractors specified after the optional by clause partition the input into\ngroups. If by is omitted, all events are assigned to the same group.

\n

resolution <duration>

\n

The resolution option specifies an optional duration value that specifies the\ntolerance when comparing time values in the group-by section. For example,\n01:48 is rounded down to 01:00 when a 1-hour resolution is used.

\n

NB: we introduced the resolution option as a stop-gap measure to compensate for\nthe lack of a rounding function. The ability to apply functions in the grouping\nexpression will replace this option in the future.

\n

Examples

\n

Group the input by src_ip and aggregate all unique dest_port values into a\nlist:

\n
summarize distinct(dest_port) by src_ip\n
\n

Same as above, but produce a count of the unique number of values instead of a\nlist:

\n
summarize count_distinct(dest_port) by src_ip\n
\n

Compute minimum, maximum of the timestamp field per src_ip group:

\n
summarize min(timestamp), max(timestamp) by src_ip\n
\n

Compute minimum, maximum of the timestamp field over all events:

\n
summarize min(timestamp), max(timestamp)\n
\n

Create a boolean flag originator that is true if any value in the group is\ntrue:

\n
summarize originator=any(is_orig) by src_ip\n
\n

Create 1-hour groups and produce a summary of network traffic between host\npairs:

\n
summarize sum(bytes_in), sum(bytes_out) by ts, src_ip, dest_ip resolution 1 hour\n
", + "docLink": "https://docs.tenzir.com/operators/summarize" }, { - label: "summarize", - type: "keyword", - detail: "Groups events and applies aggregate functions on each group.", - processedHTML: - "

Synopsis

\n
summarize <[field=]aggregation>... [by <extractor>... [resolution <duration>]]\n
\n

Description

\n

The summarize operator groups events according to a grouping expression and\napplies an aggregation function over each group. The operator consumes the\nentire input before producing an output.

\n

Fields that neither occur in an aggregation function nor in the by list\nare dropped from the output.

\n

[field=]aggregation

\n

Aggregation functions compute a single value of one or more columns in a given\ngroup. Syntactically, aggregation has the form f(x) where f is the\naggregation function and x is a field.

\n

By default, the name for the new field aggregation is its string\nrepresentation, e.g., min(timestamp). You can specify a different name by\nprepending a field assignment, e.g., min_ts=min(timestamp).

\n

The following aggregation functions are available:

\n\n

by <extractor>

\n

The extractors specified after the optional by clause partition the input into\ngroups. If by is omitted, all events are assigned to the same group.

\n

resolution <duration>

\n

The resolution option specifies an optional duration value that specifies the\ntolerance when comparing time values in the group-by section. For example,\n01:48 is rounded down to 01:00 when a 1-hour resolution is used.

\n

NB: we introduced the resolution option as a stop-gap measure to compensate for\nthe lack of a rounding function. The ability to apply functions in the grouping\nexpression will replace this option in the future.

\n

Examples

\n

Group the input by src_ip and aggregate all unique dest_port values into a\nlist:

\n
summarize distinct(dest_port) by src_ip\n
\n

Same as above, but produce a count of the unique number of values instead of a\nlist:

\n
summarize count_distinct(dest_port) by src_ip\n
\n

Compute minimum, maximum of the timestamp field per src_ip group:

\n
summarize min(timestamp), max(timestamp) by src_ip\n
\n

Compute minimum, maximum of the timestamp field over all events:

\n
summarize min(timestamp), max(timestamp)\n
\n

Create a boolean flag originator that is true if any value in the group is\ntrue:

\n
summarize originator=any(is_orig) by src_ip\n
\n

Create 1-hour groups and produce a summary of network traffic between host\npairs:

\n
summarize sum(bytes_in), sum(bytes_out) by ts, src_ip, dest_ip resolution 1 hour\n
", - docLink: "https://docs.tenzir.com/operators/transformations/summarize", + "label": "tail", + "type": "keyword", + "detail": "Limits the input to the last N events.", + "processedHTML": "

Synopsis

\n
tail [<limit>]\n
\n

Description

\n

The semantics of the tail operator are the same of the equivalent Unix tool:\nconsume all input and only display the last N events.

\n

tail <limit> is a shorthand notation for slice --begin -<limit>.

\n

<limit>

\n

An unsigned integer denoting how many events to keep. Defaults to 10.

\n

Defaults to 10.

\n

Examples

\n

Get the last ten results:

\n
tail\n
\n

Get the last five results:

\n
tail 5\n
", + "docLink": "https://docs.tenzir.com/operators/tail" }, { - label: "tail", - type: "keyword", - detail: "Limits the input to the last N events.", - processedHTML: - "

Synopsis

\n
tail [<limit>]\n
\n

Description

\n

The semantics of the tail operator are the same of the equivalent Unix tool:\nconsume all input and only display the last N events.

\n

<limit>

\n

An unsigned integer denoting how many events to keep. Defaults to 10.

\n

Defaults to 10.

\n

Examples

\n

Get the last ten results:

\n
tail\n
\n

Get the last five results:

\n
tail 5\n
", - docLink: "https://docs.tenzir.com/operators/transformations/tail", + "label": "taste", + "type": "keyword", + "detail": "Limits the input to N events per unique schema.", + "processedHTML": "

Synopsis

\n
taste [<limit>]\n
\n

Description

\n

The taste operator provides an exemplary overview of the \"shape\" of the data\ndescribed by the pipeline. This helps to understand the diversity of the\nresult, especially when interactively exploring data. Usually, the first N\nevents are returned, but this is not guaranteed.

\n

<limit>

\n

An unsigned integer denoting how many events to keep per schema.

\n

Defaults to 10.

\n

Examples

\n

Get 10 results of each unique schema:

\n
taste\n
\n

Get one sample for every unique event type:

\n
taste 1\n
", + "docLink": "https://docs.tenzir.com/operators/taste" }, { - label: "taste", - type: "keyword", - detail: "Limits the input to N events per unique schema.", - processedHTML: - '

Synopsis

\n
taste [<limit>]\n
\n

Description

\n

The taste operator provides an exemplary overview of the "shape" of the data\ndescribed by the pipeline. This helps to understand the diversity of the\nresult, especially when interactively exploring data. Usually, the first N\nevents are returned, but this is not guaranteed.

\n

<limit>

\n

An unsigned integer denoting how many events to keep per schema.

\n

Defaults to 10.

\n

Examples

\n

Get 10 results of each unique schema:

\n
taste\n
\n

Get one sample for every unique event type:

\n
taste 1\n
', - docLink: "https://docs.tenzir.com/operators/transformations/taste", + "label": "timeshift", + "type": "keyword", + "detail": "Adjusts timestamps relative to a given start time, with an optional speedup.", + "processedHTML": "

Synopsis

\n
timeshift [--start <time>] [--speed <factor>] <field>\n
\n

Description

\n

The timeshift operator adjusts a series of time values by anchoring them\naround a given start time.

\n

With --speed, you can adjust the relative speed of the time series induced by\nfield with a multiplicative factor. This has the effect of making the time\nseries \"faster\" for values great than 1 and \"slower\" for values less than 1.

\n

If you do not provide a start time with --start, the operator will anchor the\ntimestamps at the first non-null timestamp.

\n

\"Timeshift\"

\n

The options --start and --speed work independently, i.e., you can use them\nseparately or both together.

\n

--start <time>

\n

The timestamp to anchor the time values around.

\n

Defaults to the first non-null timestamp in field.

\n

--speed <speed>

\n

A constant factor to be divided by the inter-arrival time. For example, 2.0\ndecreases the event gaps by a factor of two, resulting a twice as fast dataflow.\nA value of 0.1 creates dataflow that spans ten times the original time frame.

\n

Defaults to 1.0.

\n

<field>

\n

The name of the field containing the timestamp values.

\n

Examples

\n

Set the M57 Zeek logs to begin at Jan 1, 1984:

\n
from https://storage.googleapis.com/tenzir-datasets/M57/zeek-all.log.zst read zeek-tsv\n| timeshift --start 1984-01-01 ts\n
\n

As above, but also make the time span of the trace 100 times longer:

\n
from https://storage.googleapis.com/tenzir-datasets/M57/zeek-all.log.zst read zeek-tsv\n| timeshift --start 1984-01-01 --speed 0.01 ts\n
", + "docLink": "https://docs.tenzir.com/operators/timeshift" }, { - label: "top", - type: "keyword", - detail: "Shows the most common values. The dual to rare.", - processedHTML: - "

Synopsis

\n
top <field> [--count-field=<count-field>|-c <count-field>]\n
\n

Description

\n

Shows the most common values for a given field. For each unique value, a new event containing its count will be produced.

\n

<field>

\n

The name of the field to find the most common values for.

\n

--count-field=<count-field>|-c <count-field>

\n

An optional argument specifying the field name of the count field. Defaults to count.

\n

The count field and the value field must have different names.

\n

Examples

\n

Find the most common values for field id.orig_h.

\n
top id.orig_h\n
\n

Find the most common values for field count and present the value amount in a field amount.

\n
top count --count-field=amount\n
", - docLink: "https://docs.tenzir.com/operators/transformations/top", + "label": "to", + "type": "keyword", + "detail": "Consumes events by combining a \\[connector]\\[connectors] and a \\[format]\\[formats].", + "processedHTML": "

Synopsis

\n
to <uri> [write <format>]\nto <path> [write <format>]\nto <connector> [write <format>]\n
\n

Description

\n

The to operator consumes events at the end of a pipeline by bringing together\na connector and a format.

\n

If given something that looks like a path to a file, the connector can pick\nout a format automatically based on the file extension or the file name.\nThis enables a shorter syntax, e.g., to ./file.csv uses the csv format.\nAll connectors also have a default format, which will be used\nif the format can't be determined by the path. For most connectors,\nthis default format is json.\nSo, for example, to stdin uses the json format.

\n

Additionally, if a file extension indicating compression can be found,\ncompress is automatically used. For example, to myfile.json.gz is automatically gzip-compressed and formatted as json, i.e.,\nwrite json | compress gzip | save myfile.json.gz.

\n

The to operator is a pipeline under the hood. For most cases, it is equal to\nwrite <format> | save <connector>. However, for some combinations of\nconnectors and formats the underlying pipeline is a bit more complex. We\nrecommend always using to ... write ... over the write and\nsave operators.

\n

<connector>

\n

The connector used to save bytes.

\n

Some connectors have connector-specific options. Please refer to the\ndocumentation of the individual connectors for more information.

\n

<format>

\n

The format used to print events to bytes.

\n

Some formats have format-specific options. Please refer to the documentation of\nthe individual formats for more information.

\n

Examples

\n

Write events to stdout formatted as CSV.

\n
to stdout write csv\n
\n

Write events to the file path/to/eve.json formatted as JSON.

\n
to path/to/eve.json write json\nto file path/to/eve.json write json\n
", + "docLink": "https://docs.tenzir.com/operators/to" }, { - label: "unflatten", - type: "keyword", - detail: - "Unflattens data structures whose field names imply a nested structure.", - processedHTML: - '

Synopsis

\n
unflatten [<separator>]\n
\n

Description

\n

The unflatten operator creates nested records out of record entries whose\nnames include a separator, thus unflattening

\n

unflatten uses a heuristic to determine the unflattened schema. Thus, the\nschema of a record that has been flattened using the flatten operator and\nunflattened afterwards may not be identical to the schema of the unmodified\nrecord.

\n

<separator>

\n

The separator string to unflatten records with.

\n

Defaults to ..

\n

Examples

\n

Consider the following data:

\n
{\n  "src_ip": "147.32.84.165",\n  "src_port": 1141,\n  "dest_ip": "147.32.80.9",\n  "dest_port": 53,\n  "event_type": "dns",\n  "dns.type": "query",\n  "dns.id": 553,\n  "dns.rrname": "irc.freenode.net",\n  "dns.rrtype": "A",\n  "dns.tx_id": 0,\n  "dns.grouped.A": ["tenzir.com"]\n}\n
\n

The unflatten operator recreates nested records from fields that contain the .\nseparator:

\n
{\n  "src_ip": "147.32.84.165",\n  "src_port": 1141,\n  "dest_ip": "147.32.80.9",\n  "dest_port": 53,\n  "event_type": "dns",\n  "dns": {\n    "type": "query",\n    "id": 553,\n    "rrname": "irc.freenode.net",\n    "rrtype": "A",\n    "tx_id": 0,\n    "grouped": {\n      "A": [\n        "tenzir.com"\n      ]\n    }\n  }\n}\n
', - docLink: "https://docs.tenzir.com/operators/transformations/unflatten", + "label": "top", + "type": "keyword", + "detail": "Shows the most common values. The dual to rare.", + "processedHTML": "

Synopsis

\n
top <field> [--count-field=<count-field>|-c <count-field>]\n
\n

Description

\n

Shows the most common values for a given field. For each unique value, a new event containing its count will be produced.

\n

<field>

\n

The name of the field to find the most common values for.

\n

--count-field=<count-field>|-c <count-field>

\n

An optional argument specifying the field name of the count field. Defaults to count.

\n

The count field and the value field must have different names.

\n

Examples

\n

Find the most common values for field id.orig_h.

\n
top id.orig_h\n
\n

Find the most common values for field count and present the value amount in a field amount.

\n
top count --count-field=amount\n
", + "docLink": "https://docs.tenzir.com/operators/top" }, - { - label: "unique", - type: "keyword", - detail: "Removes adjacent duplicates.", - processedHTML: - '

Synopsis

\n
unique\n
\n

Description

\n

The unique operator deduplicates adjacent values, similar to the Unix tool\nuniq.

\n

A frequent use case is selecting a set of fields, sorting the\ninput, and then removing duplicates from the input.

\n

Examples

\n

Consider the following data:

\n
{"foo": 1, "bar": "a"}\n{"foo": 1, "bar": "a"}\n{"foo": 1, "bar": "a"}\n{"foo": 1, "bar": "b"}\n{"foo": null, "bar": "b"}\n{"bar": "b"}\n{"foo": null, "bar": "b"}\n{"foo": null, "bar": "b"}\n
\n

The unique operator removes adjacent duplicates and produces the following output:

\n
{"foo": 1, "bar": "a"}\n{"foo": 1, "bar": "b"}\n{"foo": null, "bar": "b"}\n{"bar": "b"}\n{"foo": null, "bar": "b"}\n
\n

Note that the output still contains the event {"foo": null, "bar": "b"} twice.\nThis is because unique only removes adjacent duplicates.

\n

To remove all duplicates (including non-adjacent ones), sort\nthe input first such that duplicate values lay adjacent to each other. Unlike\ndeduplication via unique, sorting is a blocking and operation and consumes\nthe entire input before producing outputs.

', - docLink: "https://docs.tenzir.com/operators/transformations/unique", + { + "label": "unflatten", + "type": "keyword", + "detail": "Unflattens data structures whose field names imply a nested structure.", + "processedHTML": "

Synopsis

\n
unflatten [<separator>]\n
\n

Description

\n

The unflatten operator creates nested records out of record entries whose\nnames include a separator, thus unflattening

\n

unflatten uses a heuristic to determine the unflattened schema. Thus, the\nschema of a record that has been flattened using the flatten operator and\nunflattened afterwards may not be identical to the schema of the unmodified\nrecord.

\n

<separator>

\n

The separator string to unflatten records with.

\n

Defaults to ..

\n

Examples

\n

Consider the following data:

\n
{\n  \"src_ip\": \"147.32.84.165\",\n  \"src_port\": 1141,\n  \"dest_ip\": \"147.32.80.9\",\n  \"dest_port\": 53,\n  \"event_type\": \"dns\",\n  \"dns.type\": \"query\",\n  \"dns.id\": 553,\n  \"dns.rrname\": \"irc.freenode.net\",\n  \"dns.rrtype\": \"A\",\n  \"dns.tx_id\": 0,\n  \"dns.grouped.A\": [\"tenzir.com\"]\n}\n
\n

The unflatten operator recreates nested records from fields that contain the .\nseparator:

\n
{\n  \"src_ip\": \"147.32.84.165\",\n  \"src_port\": 1141,\n  \"dest_ip\": \"147.32.80.9\",\n  \"dest_port\": 53,\n  \"event_type\": \"dns\",\n  \"dns\": {\n    \"type\": \"query\",\n    \"id\": 553,\n    \"rrname\": \"irc.freenode.net\",\n    \"rrtype\": \"A\",\n    \"tx_id\": 0,\n    \"grouped\": {\n      \"A\": [\n        \"tenzir.com\"\n      ]\n    }\n  }\n}\n
", + "docLink": "https://docs.tenzir.com/operators/unflatten" }, - { - label: "where", - type: "keyword", - detail: "Filters events according to an expression.", - processedHTML: - '

Synopsis

\n
where <expression>\n
\n

Description

\n

The where operator only keeps events that match the provided\nexpression and discards all other events.

\n

Use where to extract the subset of interest of the data. Tenzir\'s expression\nlanguage offers various ways to describe the desired data. In particular,\nexpressions work across schemas and thus make it easy to concisely articulate\nconstraints.

\n

<expression>

\n

The expression to evaluate for each event.

\n

Examples

\n

Select all events that contain a field with the value 1.2.3.4:

\n
where 1.2.3.4\n
\n

This expression internally completes to :ip == 1.2.3.4. The type extractor\n:ip describes all fields of type ip. Use field extractors to only consider a\nsingle field:

\n
where src_ip == 1.2.3.4\n
\n

As a slight variation of the above: use a nested field name and a temporal\nconstraint of the field with name ts:

\n
where id.orig_h == 1.2.3.4 && ts > 1 hour ago\n
\n

Subnets are first-class values:

\n
where 10.10.5.0/25\n
\n

This expression unfolds to :ip in 10.10.5.0/25 || :subnet == 10.10.5.0/25. It\nmeans "select all events that contain a field of type ip in the subnet\n10.10.5.0/25, or a field of type subnet the exactly matches 10.10.5.0/25".

\n

Expressions consist of predicates that can be connected with AND, OR, and NOT:

\n
where 10.10.5.0/25 && (orig_bytes > 1 Mi || duration > 30 min)\n
', - docLink: "https://docs.tenzir.com/operators/transformations/where", + { + "label": "unique", + "type": "keyword", + "detail": "Removes adjacent duplicates.", + "processedHTML": "

Synopsis

\n
unique\n
\n

Description

\n

The unique operator deduplicates adjacent values, similar to the Unix tool\nuniq.

\n

A frequent use case is selecting a set of fields, sorting the\ninput, and then removing duplicates from the input.

\n

Examples

\n

Consider the following data:

\n
{\"foo\": 1, \"bar\": \"a\"}\n{\"foo\": 1, \"bar\": \"a\"}\n{\"foo\": 1, \"bar\": \"a\"}\n{\"foo\": 1, \"bar\": \"b\"}\n{\"foo\": null, \"bar\": \"b\"}\n{\"bar\": \"b\"}\n{\"foo\": null, \"bar\": \"b\"}\n{\"foo\": null, \"bar\": \"b\"}\n
\n

The unique operator removes adjacent duplicates and produces the following output:

\n
{\"foo\": 1, \"bar\": \"a\"}\n{\"foo\": 1, \"bar\": \"b\"}\n{\"foo\": null, \"bar\": \"b\"}\n{\"bar\": \"b\"}\n{\"foo\": null, \"bar\": \"b\"}\n
\n

Note that the output still contains the event {\"foo\": null, \"bar\": \"b\"} twice.\nThis is because unique only removes adjacent duplicates.

\n

To remove all duplicates (including non-adjacent ones), sort\nthe input first such that duplicate values lay adjacent to each other. Unlike\ndeduplication via unique, sorting is a blocking and operation and consumes\nthe entire input before producing outputs.

", + "docLink": "https://docs.tenzir.com/operators/unique" }, - { - label: "write", - type: "keyword", - detail: "The write operator converts events into raw bytes.", - processedHTML: - '

Synopsis

\n
write <format>\n
\n

Description

\n

The write operator prints events and outputs the formatted result as raw\nbytes.

\n

<format>

\n

The format used to convert events into raw bytes.

\n

Some formats have format-specific options. Please refer to the documentation of\nthe individual formats for more information.

\n

Examples

\n

Convert events into JSON:

\n
write json\n
\n

Convert events into CSV:

\n
write csv\n
', - docLink: "https://docs.tenzir.com/operators/transformations/write", + { + "label": "unroll", + "type": "keyword", + "detail": "Unrolls a list by producing multiple events, one for each item.", + "processedHTML": "

Synopsis

\n
unroll <field>\n
\n

Description

\n

The unroll operator transforms each input event into a multiple output events.\nFor each item in the input list, one output event is created, where the list is\nreplaced with its item. The surrounding data is kept as-is.

\n

\"Unroll

\n

No output events are produced if the list is empty or if the field is null.

\n

Examples

\n

Consider the following events:

\n
{\"a\": 1, \"b\": [1, 2, 3]}\n{\"a\": 2, \"b\": [1]}\n{\"a\": 3, \"b\": []}\n{\"a\": 4, \"b\": null}\n
\n

unroll b would produce the following output:

\n
{\"a\": 1, \"b\": 1}\n{\"a\": 1, \"b\": 2}\n{\"a\": 1, \"b\": 3}\n{\"a\": 2, \"b\": 1}\n
\n

The unroll operator can also be used with records.

\n
{\n  \"src\": \"192.168.0.5\",\n  \"conn\": [\n    {\n      \"dest\": \"192.168.0.34\",\n      \"active\": \"381ms\"\n    },\n    {\n      \"dest\": \"192.168.0.120\",\n      \"active\": \"42ms\"\n    },\n    {\n      \"dest\": \"1.2.3.4\",\n      \"active\": \"67ms\"\n    }\n  ]\n}\n
\n

We can use unroll conn to bring this into a form more suited for analysis.\nFor example, we would then be able to use\nwhere active > 100ms || conn.dest !in 192.168.0.0/16 to filter for relevant\nconnections.

\n
{\n  \"src\": \"192.168.0.5\",\n  \"conn\": {\n    \"dest\": \"192.168.0.34\",\n    \"active\": \"381.0ms\"\n  }\n}\n{\n  \"src\": \"192.168.0.5\",\n  \"conn\": {\n    \"dest\": \"1.2.3.4\",\n    \"active\": \"67.0ms\"\n  }\n}\n
", + "docLink": "https://docs.tenzir.com/operators/unroll" }, -]; + { + "label": "velociraptor", + "type": "keyword", + "detail": "Submits VQL to a Velociraptor server and returns the response as events.", + "processedHTML": "

Synopsis

\n
velociraptor [-n|--request-name <string>] [-o|--org-id <string>]\n             [-r|--max-rows <uint64>] [-s|--subscribe <artifact>]\n             [-q|--query <vql>] [-w|--max-wait <duration>]\n             [--profile <profile>]\n
\n

Description

\n

The velociraptor source operator provides a request-response interface to a\nVelociraptor server:

\n

\"Velociraptor\"

\n

The pipeline operator is the client and it establishes a connection to a\nVelociraptor server. The client request contains a query written in the\nVelociraptor Query Language (VQL), a SQL-inspired language with a SELECT .. FROM .. WHERE structure.

\n

You can either send a raw VQL query via velociraptor --query \"<vql>\" to a\nserver and processs the response, or hook into a continuous feed of artifacts\nvia velociraptor --subscribe <artifact>. Whenever a hunt runs that contains\nthis artifact, the server will forward it to the pipeline and emit the artifact\npayload in the response field HuntResults.

\n

All Velociraptor client-to-server communication is mutually authenticated and\nencrypted via TLS certificates. This means you must provide client-side\ncertificate, which you can generate as follows. (Velociraptor ships as a static\nbinary that we refer to as velociraptor-binary here.)

\n
    \n
  1. \n

    Create a server configuration server.yaml:

    \n
    velociraptor-binary config generate > server.yaml\n
    \n
  2. \n
  3. \n

    Create an API client:

    \n
    velociraptor-binary -c server.yaml config api_client --name tenzir client.yaml\n
    \n

    Copy the generated client.yaml to your Tenzir plugin configuration\ndirectory as velociraptor.yaml so that the operator can find it:

    \n
    cp client.yaml /etc/tenzir/plugin/velociraptor.yaml\n
    \n
  4. \n
  5. \n

    Run the frontend with the server configuration:

    \n
    velociraptor-binary -c server.yaml frontend\n
    \n
  6. \n
\n

Now you are ready to run VQL queries!

\n

-n|--request-name <string>

\n

An identifier for the request to the Velociraptor server.

\n

Defaults to a randoum UUID.

\n

-o|--org-id <string>

\n

The ID of the Velociraptor organization.

\n

Defaults to root.

\n

-q|--query <vql>

\n

The VQL query string.

\n

-r|--max-rows <uint64>

\n

The maxium number of rows to return in a the stream gRPC messages returned by\nthe server.

\n

Defaults to 1,000.

\n

-s|--subscribe <artifact>

\n

Subscribes to a flow artifact.

\n

This option generates a larger VQL expression under the hood that creates one\nevent per flow and artifact. The response contains a field HuntResult that\ncontains the result of the hunt.

\n

-w|--max-wait <duration>

\n

Controls how long to wait before releasing a partial result set.

\n

Defaults to 1 sec.

\n

--profile <profile>

\n

Specifies the configuration profile for the Velociraptor instance. This enables\nconnecting to multiple Velociraptor instances from the same Tenzir node.

\n

To use profiles, edit your velociraptor.yaml configuration like this, where\n<config> refers to the contents of the configuration file created by Velociraptor, and\n<profile> to the desired profile name.

\n
# before\n<config>\n\n# after\nprofiles:\n  <profile>:\n    <config>\n
\n

If profiles are defined, the operator defaults to the first profile.

\n

Examples

\n

Show all processes:

\n
velociraptor --query \"select * from pslist()\"\n
\n

Subscribe to a hunt flow that contains the Windows artifact:

\n
velociraptor --subscribe Windows\n
", + "docLink": "https://docs.tenzir.com/operators/velociraptor" + }, + { + "label": "version", + "type": "keyword", + "detail": "Shows the current version.", + "processedHTML": "

Synopsis

\n
version\n
\n

Description

\n

The version operator shows the current Tenzir version.

\n

Schemas

\n

Tenzir emits version information with the following schema.

\n

tenzir.version

\n

Contains detailed information about the process version.

\n

|Field|Type|Description|\n|:-|:-|:-|\n|version|string|The formatted version string.|\n|major|uint64|The major release version.|\n|minor|uint64|The minor release version.|\n|patch|uint64|The patch release version.|\n|tweak|uint64|The number of changes since the last release.|

\n

Examples

\n

Use version to show the current version of a development build:

\n
{\n  \"version\": \"v4.6.3-36-gbd4c8a058b-dirty\",\n  \"major\": 4,\n  \"minor\": 6,\n  \"patch\": 3,\n  \"tweak\": 36\n}\n
\n

Use version to show the current version of a release build:

\n
{\n  \"version\": \"v4.7.0\",\n  \"major\": 4,\n  \"minor\": 7,\n  \"patch\": 0,\n  \"tweak\": 0\n}\n
", + "docLink": "https://docs.tenzir.com/operators/version" + }, + { + "label": "where", + "type": "keyword", + "detail": "Filters events according to an expression.", + "processedHTML": "

Synopsis

\n
where <expression>\n
\n

Description

\n

The where operator only keeps events that match the provided\nexpression and discards all other events.

\n

Use where to extract the subset of interest of the data. Tenzir's expression\nlanguage offers various ways to describe the desired data. In particular,\nexpressions work across schemas and thus make it easy to concisely articulate\nconstraints.

\n

<expression>

\n

The expression to evaluate for each event.

\n

Examples

\n

Select all events that contain a field with the value 1.2.3.4:

\n
where 1.2.3.4\n
\n

This expression internally completes to :ip == 1.2.3.4. The type extractor\n:ip describes all fields of type ip. Use field extractors to only consider a\nsingle field:

\n
where src_ip == 1.2.3.4\n
\n

As a slight variation of the above: use a nested field name and a temporal\nconstraint of the field with name ts:

\n
where id.orig_h == 1.2.3.4 and ts > 1 hour ago\n
\n

Subnets are first-class values:

\n
where 10.10.5.0/25\n
\n

This expression unfolds to :ip in 10.10.5.0/25 or :subnet == 10.10.5.0/25. It\nmeans \"select all events that contain a field of type ip in the subnet\n10.10.5.0/25, or a field of type subnet the exactly matches 10.10.5.0/25\".

\n

Expressions consist of predicates that can be connected with and, or, and\nnot:

\n
where 10.10.5.0/25 and (orig_bytes > 1 Mi or duration > 30 min)\n
", + "docLink": "https://docs.tenzir.com/operators/where" + }, + { + "label": "write", + "type": "keyword", + "detail": "The write operator converts events into raw bytes.", + "processedHTML": "

Synopsis

\n
write <format>\n
\n

Description

\n

The write operator prints events and outputs the formatted result as raw\nbytes.

\n

<format>

\n

The format used to convert events into raw bytes.

\n

Some formats have format-specific options. Please refer to the documentation of\nthe individual formats for more information.

\n

Examples

\n

Convert events into JSON:

\n
write json\n
\n

Convert events into CSV:

\n
write csv\n
", + "docLink": "https://docs.tenzir.com/operators/write" + }, + { + "label": "yara", + "type": "keyword", + "detail": "Executes YARA rules on byte streams.", + "processedHTML": "

Synopsis

\n
yara [-B|--blockwise] [-C|--compiled-rules] [-f|--fast-scan] <rule> [<rule>..]\n
\n

Description

\n

The yara operator applies YARA rules to\nan input of bytes, emitting rule context upon a match.

\n

\"YARA

\n

We modeled the operator after the official yara command-line\nutility to enable a\nfamiliar experience for the command users. Similar to the official yara\ncommand, the operator compiles the rules by default, unless you provide the\noption -C,--compiled-rules. To quote from the above link:

\n
\n

This is a security measure to prevent users from inadvertently using compiled\nrules coming from a third-party. Using compiled rules from untrusted sources\ncan lead to the execution of malicious code in your computer.

\n
\n

The operator uses a YARA scanner under the hood that buffers blocks of bytes\nincrementally. Even though the input arrives in non-contiguous blocks of\nmemories, the YARA scanner engine support matching across block boundaries. For\ncontinuously running pipelines, use the --blockwise option that considers each\nblock as a separate unit. Otherwise the scanner engine would simply accumulate\nblocks but never trigger a scan.

\n

-B|--blockwise

\n

Match on every byte chunk instead of triggering a scan when the input exhausted.

\n

This option makes sense for never-ending dataflows where each chunk of bytes\nconstitutes a self-contained unit, such as a single file.

\n

-C|--compiled-rules

\n

Interpret the rules as compiled.

\n

When providing this flag, you must exactly provide one rule path as positional\nargument.

\n

-f|--fast-scan

\n

Enable fast matching mode.

\n

<rule>

\n

The path to the YARA rule(s).

\n

If the path is a directory, the operator attempts to recursively add all\ncontained files as YARA rules.

\n

Examples

\n

The examples below show how you can scan a single file and how you can create a\nsimple rule scanning service.

\n

Perform one-shot scanning of files

\n

Scan a file with a set of YARA rules:

\n
load file --mmap evil.exe | yara rule.yara\n
\n
Memory Mapping Optimization

The --mmap flag is merely an optimization that constructs a single chunk of\nbytes instead of a contiguous stream. Without --mmap, the\nfile loader generates a stream of byte chunks and\nfeeds them incrementally to the yara operator. This also works, but\nperformance is better due to memory locality when using --mmap.

\n

Let's unpack a concrete example:

\n
rule test {\n  meta:\n    string = \"string meta data\"\n    integer = 42\n    boolean = true\n\n  strings:\n    $foo = \"foo\"\n    $bar = \"bar\"\n    $baz = \"baz\"\n\n  condition:\n    ($foo and $bar) or $baz\n}\n
\n

You can produce test matches by feeding bytes into the yara operator:

\n
echo 'foo bar' | tenzir 'load stdin | yara /tmp/test.yara'\n
\n

You will get one yara.match per matching rule:

\n
{\n  \"rule\": {\n    \"identifier\": \"test\",\n    \"namespace\": \"default\",\n    \"tags\": [],\n    \"meta\": {\n      \"string\": \"string meta data\",\n      \"integer\": 42,\n      \"boolean\": true\n    },\n    \"strings\": {\n      \"$foo\": \"foo\",\n      \"$bar\": \"bar\",\n      \"$baz\": \"baz\"\n    }\n  },\n  \"matches\": {\n    \"$foo\": [\n      {\n        \"data\": \"Zm9v\",\n        \"base\": 0,\n        \"offset\": 0,\n        \"match_length\": 3\n      }\n    ],\n    \"$bar\": [\n      {\n        \"data\": \"YmFy\",\n        \"base\": 0,\n        \"offset\": 4,\n        \"match_length\": 3\n      }\n    ]\n  }\n}\n
\n

Each match has a rule field describing the rule and a matches record\nindexed by string identifier to report a list of matches per rule string.

\n

Build a YARA scanning service

\n

Let's say you want to build a service that scans malware sample that you receive\nover a Kafka topic malware.

\n

Launch the processing pipeline as follows:

\n
load kafka --topic malware | yara --blockwise /path/to/rules\n
\n

If you run this pipeline on the command line via tenzir <pipeline>, you see\nthe matches arriving as JSON. You could also send the matches via the\nfluent-bit sink to Slack, Splunk, or any other\nFluent Bit output. For example, via Slack:

\n
load kafka --topic malware\n| yara --blockwise /path/to/rules\n| fluent-bit slack webhook=<url>\n
\n

This pipeline requires that every Kafka message is a self-contained malware\nsample. Because the pipeline runs continuously, we supply the --blockwise\noption so that the yara triggers a scan for every Kafka message, as opposed to\naccumulating all messages indefinitely and only initiating a scan when the input\nexhausts.

\n

You can now submit a malware sample by sending it to the malware Kafka topic:

\n
load file --mmap evil.exe | save kafka --topic malware\n
\n

This pipeline loads the file evil.exe as single blob and sends it to Kafka, at\ntopic malware.

", + "docLink": "https://docs.tenzir.com/operators/yara" + }, + { + "label": "yield", + "type": "keyword", + "detail": "Extracts nested records with the ability to unfold lists.", + "processedHTML": "

Synopsis

\n
yield <extractor>\n
\n

Description

\n

The yield operator can be used to \"zoom into\" the extracted part of the\nincoming events. It can also return a new event for each element of a list.

\n

<extractor>

\n

The extractor must start with a field name. This can be followed by . and\nanother field name, or by [] to extract all elements from the given list.

\n

Examples

\n

The schema suricata.dns provides a list of answers for DNS queries. Assume we\nwant to extract all answers for CNAME records.

\n
from eve.json\n| where #schema == \"suricata.dns\"\n| yield dns.answers[]\n| where rrtype == \"CNAME\"\n
", + "docLink": "https://docs.tenzir.com/operators/yield" + } +]; \ No newline at end of file diff --git a/scripts/index.js b/scripts/index.js index 1950b9e..7041e78 100644 --- a/scripts/index.js +++ b/scripts/index.js @@ -31,12 +31,28 @@ const processFile = async (filePath) => { let detail = undefined; let infoArr = []; - for (let i = 1; i < lines.length; i++) { + let i = 0; + while (i < lines.length) { + if (lines[i].startsWith("---")) { + i++; + while (!lines[i].startsWith("---")) { + i++; + } + i++; + } else if (lines[i].trim() === "" || lines[i].startsWith("#")) { + i++; + } else { + break; + } + } + while (i < lines.length) { + console.log(lines[i]) if (detail == undefined && lines[i].trim() !== "") { detail = lines[i]; } else if (detail != null) { infoArr.push(lines[i]); } + i++; } let info = infoArr.join("\n"); @@ -74,9 +90,7 @@ const processDirectory = async (directoryPath, outputPath) => { // ignore files that are not inside sources, transformations, or sinks subdirectories // TODO: this is a hacky way to do this, but it works for now if ( - !file.includes("sources") && - !file.includes("transformations") && - !file.includes("sinks") + !file.includes("/operators/") ) { continue; } @@ -92,7 +106,7 @@ const processDirectory = async (directoryPath, outputPath) => { await writeFile( outputPath, - `export const data = ${JSON.stringify(output)};`, + `export const data = ${JSON.stringify(output, undefined, 2)};`, "utf8", ); } catch (err) { diff --git a/yarn.lock b/yarn.lock index 477cbec..cb8af43 100644 --- a/yarn.lock +++ b/yarn.lock @@ -64,17 +64,22 @@ resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz#d7c6e6755c78567a951e04ab52ef0fd26de59f32" integrity sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg== -"@lezer/common@^1.0.0", "@lezer/common@^1.0.2": +"@lezer/common@^1.0.0": version "1.0.2" resolved "https://registry.yarnpkg.com/@lezer/common/-/common-1.0.2.tgz#8fb9b86bdaa2ece57e7d59e5ffbcb37d71815087" integrity sha512-SVgiGtMnMnW3ActR8SXgsDhw7a0w0ChHSYAyAUxxrOiJ1OqYWEKk/xJd84tTSPo1mo6DXLObAJALNnd0Hrv7Ng== -"@lezer/generator@^1.0.0": - version "1.2.3" - resolved "https://registry.yarnpkg.com/@lezer/generator/-/generator-1.2.3.tgz#db6504588a214791a7f19e26e177c060ba21d5f0" - integrity sha512-xRmNryYbJpWs7novjWtQLCGHOj71B4X1QHQ4SgJqwm11tl6COEVAGhuFTXKX16JMJUhumdXaX8We6hEMd4clDg== +"@lezer/common@^1.1.0": + version "1.2.1" + resolved "https://registry.yarnpkg.com/@lezer/common/-/common-1.2.1.tgz#198b278b7869668e1bebbe687586e12a42731049" + integrity sha512-yemX0ZD2xS/73llMZIK6KplkjIjf2EvAHcinDi/TfJ9hS25G0388+ClHt6/3but0oOxinTcQHJLDXh6w1crzFQ== + +"@lezer/generator@^1.3.0": + version "1.7.0" + resolved "https://registry.yarnpkg.com/@lezer/generator/-/generator-1.7.0.tgz#7163c013125164fee2728303fd577dceb2acc46f" + integrity sha512-IJ16tx3biLKlCXUzcK4v8S10AVa2BSM2rB12rtAL6f1hL2TS/HQQlGCoWRvanlL2J4mCYEEIv9uG7n4kVMkVDA== dependencies: - "@lezer/common" "^1.0.2" + "@lezer/common" "^1.1.0" "@lezer/lr" "^1.3.0" "@lezer/highlight@^1.0.0": @@ -84,6 +89,13 @@ dependencies: "@lezer/common" "^1.0.0" +"@lezer/highlight@^1.1.6": + version "1.2.0" + resolved "https://registry.yarnpkg.com/@lezer/highlight/-/highlight-1.2.0.tgz#e5898c3644208b4b589084089dceeea2966f7780" + integrity sha512-WrS5Mw51sGrpqjlh3d4/fOwpEV2Hd3YOkp9DBt4k8XZQcoTHZFB7sx030A6OcahF4J1nDQAa3jXlTVVYH50IFA== + dependencies: + "@lezer/common" "^1.0.0" + "@lezer/lr@^1.0.0", "@lezer/lr@^1.3.0": version "1.3.4" resolved "https://registry.yarnpkg.com/@lezer/lr/-/lr-1.3.4.tgz#8795bf2ba4f69b998e8fb4b5a7c57ea68753474c" @@ -110,6 +122,13 @@ resolved "https://registry.yarnpkg.com/@types/estree/-/estree-1.0.1.tgz#aa22750962f3bf0e79d753d3cc067f010c95f194" integrity sha512-LG4opVs2ANWZ1TJoKc937iMmNstM/d0ae1vNbnBvBhqCSezgVUOzcLCqbI5elV8Vy6WKwKjaqR+zO9VKirBBCA== +"@types/hast@^3.0.0": + version "3.0.4" + resolved "https://registry.yarnpkg.com/@types/hast/-/hast-3.0.4.tgz#1d6b39993b82cea6ad783945b0508c25903e15aa" + integrity sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ== + dependencies: + "@types/unist" "*" + "@types/node@^17.0.36": version "17.0.45" resolved "https://registry.yarnpkg.com/@types/node/-/node-17.0.45.tgz#2c0fafd78705e7a18b7906b5201a522719dc5190" @@ -130,6 +149,11 @@ resolved "https://registry.yarnpkg.com/@types/ua-parser-js/-/ua-parser-js-0.7.36.tgz#9bd0b47f26b5a3151be21ba4ce9f5fa457c5f190" integrity sha512-N1rW+njavs70y2cApeIw1vLMYXRwfBy+7trgavGuuTfOd7j1Yh7QTRc/yqsPl6ncokt72ZXuxEU0PiCp9bSwNQ== +"@types/unist@*", "@types/unist@^3.0.0": + version "3.0.2" + resolved "https://registry.yarnpkg.com/@types/unist/-/unist-3.0.2.tgz#6dd61e43ef60b34086287f83683a5c1b2dc53d20" + integrity sha512-dqId9J8K/vGi5Zr7oo212BGii5m3q5Hxlkwy3WpYuKPklmBEvsbMYYyLxAQpSffdLl/gdW0XUpKWFvYmyoWCoQ== + "@ungap/promise-all-settled@1.1.2": version "1.1.2" resolved "https://registry.yarnpkg.com/@ungap/promise-all-settled/-/promise-all-settled-1.1.2.tgz#aa58042711d6e3275dd37dc597e5d31e8c290a44" @@ -313,6 +337,11 @@ color-name@~1.1.4: resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== +comma-separated-tokens@^2.0.0: + version "2.0.3" + resolved "https://registry.yarnpkg.com/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz#4e89c9458acb61bc8fef19f4529973b2392839ee" + integrity sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg== + compatfactory@^2.0.9: version "2.0.9" resolved "https://registry.yarnpkg.com/compatfactory/-/compatfactory-2.0.9.tgz#98ccc78c7cac723ce05db0b7d9dae41b61407d7a" @@ -448,6 +477,24 @@ has-flag@^4.0.0: resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b" integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ== +hast-util-parse-selector@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz#352879fa86e25616036037dd8931fb5f34cb4a27" + integrity sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A== + dependencies: + "@types/hast" "^3.0.0" + +hastscript@^8.0.0: + version "8.0.0" + resolved "https://registry.yarnpkg.com/hastscript/-/hastscript-8.0.0.tgz#4ef795ec8dee867101b9f23cc830d4baf4fd781a" + integrity sha512-dMOtzCEd3ABUeSIISmrETiKuyydk1w0pa+gE/uormcTpSYuaNJPbX1NU3JLyscSLjwAQM8bWMhhIlnCqnRvDTw== + dependencies: + "@types/hast" "^3.0.0" + comma-separated-tokens "^2.0.0" + hast-util-parse-selector "^4.0.0" + property-information "^6.0.0" + space-separated-tokens "^2.0.0" + he@1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f" @@ -683,6 +730,11 @@ picomatch@^2.0.4, picomatch@^2.2.1, picomatch@^2.3.1: resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42" integrity sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA== +property-information@^6.0.0: + version "6.5.0" + resolved "https://registry.yarnpkg.com/property-information/-/property-information-6.5.0.tgz#6212fbb52ba757e92ef4fb9d657563b933b7ffec" + integrity sha512-PgTgs/BlvHxOu8QuEN7wi5A0OmXaBcHpmCSTehcs6Uuu9IkDIEo13Hy7n898RHfrQ49vKCoGeWZSaAK01nwVig== + randombytes@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a" @@ -758,6 +810,11 @@ sourcemap-codec@^1.4.8: resolved "https://registry.yarnpkg.com/sourcemap-codec/-/sourcemap-codec-1.4.8.tgz#ea804bd94857402e6992d05a38ef1ae35a9ab4c4" integrity sha512-9NykojV5Uih4lgo5So5dtw+f0JgJX30KCNI8gwhz2J9A15wD0Ml6tjHKwf6fTSa6fAdVBdZeNOs9eJ71qCk8vA== +space-separated-tokens@^2.0.0: + version "2.0.2" + resolved "https://registry.yarnpkg.com/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz#1ecd9d2350a3844572c3f4a312bceb018348859f" + integrity sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q== + string-width@^4.1.0, string-width@^4.2.0: version "4.2.3" resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" @@ -834,6 +891,30 @@ ua-parser-js@^1.0.33: resolved "https://registry.yarnpkg.com/ua-parser-js/-/ua-parser-js-1.0.35.tgz#c4ef44343bc3db0a3cbefdf21822f1b1fc1ab011" integrity sha512-fKnGuqmTBnIE+/KXSzCn4db8RTigUzw1AN0DmdU6hJovUTbYJKyqj+8Mt1c4VfRDnOVJnENmfYkIPZ946UrSAA== +unist-util-is@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/unist-util-is/-/unist-util-is-6.0.0.tgz#b775956486aff107a9ded971d996c173374be424" + integrity sha512-2qCTHimwdxLfz+YzdGfkqNlH0tLi9xjTnHddPmJwtIG9MGsdbutfTc4P+haPD7l7Cjxf/WZj+we5qfVPvvxfYw== + dependencies: + "@types/unist" "^3.0.0" + +unist-util-visit-parents@^6.0.0: + version "6.0.1" + resolved "https://registry.yarnpkg.com/unist-util-visit-parents/-/unist-util-visit-parents-6.0.1.tgz#4d5f85755c3b8f0dc69e21eca5d6d82d22162815" + integrity sha512-L/PqWzfTP9lzzEa6CKs0k2nARxTdZduw3zyh8d2NVBnsyvHjSX4TWse388YrrQKbvI8w20fGjGlhgT96WwKykw== + dependencies: + "@types/unist" "^3.0.0" + unist-util-is "^6.0.0" + +unist-util-visit@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/unist-util-visit/-/unist-util-visit-5.0.0.tgz#a7de1f31f72ffd3519ea71814cccf5fd6a9217d6" + integrity sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg== + dependencies: + "@types/unist" "^3.0.0" + unist-util-is "^6.0.0" + unist-util-visit-parents "^6.0.0" + update-browserslist-db@^1.0.10: version "1.0.11" resolved "https://registry.yarnpkg.com/update-browserslist-db/-/update-browserslist-db-1.0.11.tgz#9a2a641ad2907ae7b3616506f4b977851db5b940" From 387fff61dc20b8e800dc3d1f03b909cdd191b824 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jannis=20Christopher=20K=C3=B6hl?= Date: Fri, 26 Apr 2024 14:53:31 +0200 Subject: [PATCH 2/2] Remove extra `console.log` --- scripts/index.js | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/index.js b/scripts/index.js index 7041e78..f54d3c9 100644 --- a/scripts/index.js +++ b/scripts/index.js @@ -46,7 +46,6 @@ const processFile = async (filePath) => { } } while (i < lines.length) { - console.log(lines[i]) if (detail == undefined && lines[i].trim() !== "") { detail = lines[i]; } else if (detail != null) {