Skip to content

Commit

Permalink
Merge pull request #407 from opencybersecurityalliance/develop
Browse files Browse the repository at this point in the history
v1.7.6
  • Loading branch information
subbyte authored Sep 25, 2023
2 parents 797aae5 + bca61b7 commit 3138950
Show file tree
Hide file tree
Showing 19 changed files with 364 additions and 63 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,21 @@ The format is based on `Keep a Changelog`_.
Unreleased
==========

1.7.6 (2023-09-25)
==================

Added
-----

- ``DESCRIBE`` command to get insight of attributes
- ``ikestrel`` interactive shell (command-line utility)
- Custom stix-shifter connector support #402

Fixed
-----

- Command-line utility tests failed without install

1.7.5 (2023-09-07)
==================

Expand Down
92 changes: 92 additions & 0 deletions bin/ikestrel
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/env python3

# Executing a hunt in an interactive CLI
# Usage: `ikestrel [-v] [--debug]`

import argparse
import cmd
import logging

from tabulate import tabulate

from kestrel.session import Session
from kestrel.codegen.display import DisplayBlockSummary, DisplayDataframe
from kestrel.exceptions import KestrelException
from kestrel.utils import add_logging_handler, clear_logging_handlers


CMDS = [ # command_no_result from kestrel.lark
"APPLY",
"DISP",
"INFO",
"SAVE",
]


def display_outputs(outputs):
for i in outputs:
if isinstance(i, DisplayBlockSummary):
print(i.to_string())
elif isinstance(i, DisplayDataframe):
data = i.to_dict()["data"]
print(tabulate(data, headers="keys"))
else:
print(i.to_string())


class Cli(cmd.Cmd):
prompt = "> "

def __init__(self, session: Session):
self.session = session
self.buf = ""
super().__init__()

def default(self, line: str):
try:
outputs = self.session.execute(line)
display_outputs(outputs)
except KestrelException as e:
print(e)

def completenames(self, text, *ignored):
code, _start, _end = ignored
if code.isupper():
# Probably a command?
results = [i for i in CMDS if i.startswith(code)]
else:
# Try all commands and vars
results = [i for i in CMDS if i.lower().startswith(code)]
results += [
i for i in self.session.get_variable_names() if i.startswith(code)
]
return results

def completedefault(self, *ignored):
_, code, start, end = ignored
results = self.session.do_complete(code, end)
stub = code[start:]
return [stub + suffix for suffix in results]

def do_EOF(self, _line: str):
print()
return True


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Kestrel Interpreter")
parser.add_argument(
"-v", "--verbose", help="print verbose log", action="store_true"
)
parser.add_argument(
"--debug", help="debug level log (default is info level)", action="store_true"
)
args = parser.parse_args()

clear_logging_handlers()
if args.verbose:
add_logging_handler(logging.StreamHandler(), args.debug)

with Session(debug_mode=args.debug) as s:
cli = Cli(s)
cli.cmdloop()
4 changes: 2 additions & 2 deletions bin/stix-shifter-diag
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import argparse
import datetime
from kestrel_datasource_stixshifter.diagnosis import Diagnosis
from kestrel_datasource_stixshifter.connector import check_module_availability
from kestrel_datasource_stixshifter.connector import setup_connector_module
from firepit.timestamp import timefmt


Expand Down Expand Up @@ -83,7 +83,7 @@ if __name__ == "__main__":
diag.diagnose_config()

# 2. setup connector and ping
check_module_availability(diag.connector_name, args.ignore_cert)
setup_connector_module(diag.connector_name, diag.allow_dev_connector, args.ignore_cert)

# 3. query translation test
diag.diagnose_translate_query(patterns[0])
Expand Down
39 changes: 38 additions & 1 deletion docs/language/commands.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ A Kestrel command describes a :ref:`language/tac:hunt step` in one of the five c
#. Retrieval: ``GET``, ``FIND``, ``NEW``.
#. Transformation: ``SORT``, ``GROUP``.
#. Enrichment: ``APPLY``.
#. Inspection: ``INFO``, ``DISP``.
#. Inspection: ``INFO``, ``DISP``, ``DESCRIBE``.
#. Flow-control: ``SAVE``, ``LOAD``, ``ASSIGN``, ``MERGE``, ``JOIN``.

To achieve :ref:`language/tac:composable hunt flow` and allow threat hunters to compose hunt
Expand Down Expand Up @@ -47,6 +47,8 @@ object, or both a variable and a display object.
+---------+----------------+---------------+----------------+---------------+
| DISP | yes | maybe | no | yes |
+---------+----------------+---------------+----------------+---------------+
| DESCRIBE | yes | no | no | yes |
+---------+----------------+---------------+----------------+---------------+
| SORT | yes | yes | yes | no |
+---------+----------------+---------------+----------------+---------------+
| GROUP | yes | yes | yes | no |
Expand Down Expand Up @@ -674,6 +676,41 @@ Examples
# display the timestamps from observations of those processes:
DISP TIMESTAMPED(procs) ATTR pid, name, command_line
DESCRIBE
--------

The command ``DESCRIBE`` is an *inspection* hunt step to show
descriptive statistics of a Kestrel variable attribute.

Syntax
^^^^^^
::

DESCRIBE varx.attr

The command shows the following information of an numeric attribute:

- count: the number of non-NULL values
- mean: the average value
- min: the minimum value
- max: the maximum value

The command shows the following information of other attributes:

- count: the number of non-NULL values
- unique: the number of unique values
- top: the most freqently occurring value
- freq: the number of occurrences of the top value

Examples
^^^^^^^^

.. code-block:: coffeescript
# showing information like unique count of src_port
nt = GET network-traffic FROM stixshifter://idsX WHERE dst_port = 80
DESCRIBE nt.src_port
SORT
----

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[build-system]
requires = ["setuptools >= 56.0.0", "wheel"]
requires = ["setuptools >= 68.2.2", "wheel"]
build-backend = "setuptools.build_meta"
11 changes: 6 additions & 5 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = kestrel-lang
version = 1.7.5
version = 1.7.6
description = Kestrel Threat Hunting Language
long_description = file:README.rst
long_description_content_type = text/x-rst
Expand All @@ -26,21 +26,22 @@ package_dir =
=src
scripts =
bin/kestrel
bin/ikestrel
bin/stix-shifter-diag
python_requires = >= 3.8
install_requires =
typeguard>=4.1.3
typeguard>=4.1.5
pyyaml>=6.0.1
lxml>=4.9.3
lark>=1.1.7
pandas>=2.0.0
pandas>=2.0.3
pyarrow>=13.0.0
docker>=6.1.3
requests>=2.31.0
nest-asyncio>=1.5.7
nest-asyncio>=1.5.8
stix-shifter==6.2.1
stix-shifter-utils==6.2.1
firepit>=2.3.27
firepit>=2.3.29
tests_require =
pytest

Expand Down
64 changes: 63 additions & 1 deletion src/kestrel/codegen/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,18 @@

from firepit.deref import auto_deref
from firepit.exceptions import InvalidAttr
from firepit.query import Limit, Offset, Order, Predicate, Projection, Query
from firepit.query import (
Aggregation,
Column,
Limit,
Group,
Offset,
Order,
Predicate,
Projection,
Query,
Table,
)
from firepit.stix20 import summarize_pattern

from kestrel.utils import remove_empty_dicts, dedup_ordered_dicts
Expand Down Expand Up @@ -223,6 +234,57 @@ def disp(stmt, session):
return None, DisplayDataframe(dedup_ordered_dicts(remove_empty_dicts(content)))


@_debug_logger
@_skip_command_if_empty_input
def describe(stmt, session):
entity_table = session.symtable[stmt["input"]].entity_table
attribute = stmt["attribute"]
schema = {i["name"]: i["type"] for i in session.store.schema(entity_table)}
attr_type = schema[attribute].lower()

result = OrderedDict()

qry = Query(entity_table)
if attr_type in ("integer", "bigint", "numeric"):
qry.append(
Aggregation(
[
("COUNT", attribute, "count"),
("AVG", attribute, "mean"),
("MIN", attribute, "min"),
("MAX", attribute, "max"),
]
)
)
else:
qry.append(
Aggregation(
[("COUNT", attribute, "count"), ("NUNIQUE", attribute, "unique")]
)
)
cursor = session.store.run_query(qry)
content = cursor.fetchall()[0]
result.update(content)

# Need second query for top and freq
qry = Query(
[
Table(entity_table),
Group([Column(attribute, alias="top")]),
Aggregation([("COUNT", "*", "freq")]),
Order([("freq", Order.DESC)]),
Limit(1),
]
)

cursor = session.store.run_query(qry)
content = cursor.fetchall()[0]

result.update(content)

return None, DisplayDict(result)


@_debug_logger
@_default_output
@_skip_command_if_empty_input
Expand Down
5 changes: 5 additions & 0 deletions src/kestrel/syntax/kestrel.lark
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ assignment: VARIABLE "=" expression
| disp
| info
| save
| describe

//
// All commands
Expand Down Expand Up @@ -58,6 +59,8 @@ info: "INFO"i VARIABLE

save: "SAVE"i VARIABLE "TO"i stdpath

describe: "DESCRIBE"i var_attr

//
// Variable definition
//
Expand Down Expand Up @@ -255,6 +258,8 @@ literal_list: "(" literal ("," literal)* ")"

reference_or_simple_string: ECNAME ("." ATTRIBUTE)?

var_attr: ECNAME "." ATTRIBUTE

?string: advanced_string

number: NUMBER
Expand Down
8 changes: 8 additions & 0 deletions src/kestrel/syntax/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ def disp(self, args):
packet["attrs"] = "*"
return packet

def describe(self, args):
packet = {"command": "describe"}
packet.update(args[0])
return packet

def get(self, args):
packet = {
"command": "get",
Expand Down Expand Up @@ -272,6 +277,9 @@ def literal_list(self, args):
def literal(self, args):
return args[0]

def var_attr(self, args):
return {"input": _first(args), "attribute": _second(args)}

def reference_or_simple_string(self, args):
if len(args) > 1:
variable = _first(args)
Expand Down
Loading

0 comments on commit 3138950

Please sign in to comment.