diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml
index 91a98060..255e3bc2 100644
--- a/.github/workflows/code-coverage.yml
+++ b/.github/workflows/code-coverage.yml
@@ -3,14 +3,12 @@ name: Code Coverage Evaluation on PR
 on:
   push:
     branches:
-      - develop
-      - develop_*
+      - develop_v1
     paths:
       - 'packages/*/src/**'
   pull_request:
     branches:
-      - develop
-      - develop_*
+      - develop_v1
     paths:
       - 'packages/*/src/**'
     types:
diff --git a/.github/workflows/code-style.yml b/.github/workflows/code-style.yml
index 6c4e59e7..8860bae7 100644
--- a/.github/workflows/code-style.yml
+++ b/.github/workflows/code-style.yml
@@ -3,14 +3,12 @@ name: Code style compliance check
 on:
   push:
     branches:
-      - develop
-      - develop_*
+      - develop_v1
     paths:
       - 'packages/*/src/**'
   pull_request:
     branches:
-      - develop
-      - develop_*
+      - develop_v1
     paths:
       - 'packages/*/src/**'
     types:
diff --git a/.github/workflows/integration-testing.yml b/.github/workflows/integration-testing.yml
index 0e4d4a0e..74824757 100644
--- a/.github/workflows/integration-testing.yml
+++ b/.github/workflows/integration-testing.yml
@@ -2,17 +2,13 @@ name: Integration Testing With stix-shifter and Live Data Sources
 on:
   push:
     branches:
-      - develop
-      - develop_*
-      - release
+      - develop_v1
     paths:
       - 'packages/*/src/**'
       - 'pyproject.toml'
   pull_request:
     branches:
-      - develop
-      - develop_*
-      - release
+      - develop_v1
     paths:
       - 'packages/*/src/**'
       - 'pyproject.toml'
diff --git a/.github/workflows/stixshifter-module-verification.yml b/.github/workflows/stixshifter-module-verification.yml
deleted file mode 100644
index 66949595..00000000
--- a/.github/workflows/stixshifter-module-verification.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: Daily STIX-shifter Connector Package Verification Test
-
-on:
-  schedule:
-    # Run this once per day, towards the end of the day for keeping the most
-    # recent data point most meaningful (hours are interpreted in UTC).
-    - cron: "55 02 * * *"
-  workflow_dispatch: # Allow for running this manually.
-
-jobs:
-  verify-stixshifter:
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        shell: bash
-        working-directory: ./packages/kestrel_datasource_stixshifter
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.10'
-      - name: Install Python Tools
-        run: pip install --upgrade pip setuptools wheel
-      - name: Install kestrel_core
-        working-directory: ./packages/kestrel_core
-        run: pip install .
-      - name: Install kestrel_datasource_stixshifter
-        run: pip install .[test]
-      - name: Sample STIX-shifter Connector Package Verification on PyPI
-        run: pytest -vv tests/test_stixshifter.py -k test_verify_package_origin
diff --git a/.github/workflows/unit-testing-kestrel2.yml b/.github/workflows/unit-testing-kestrel2.yml
deleted file mode 100644
index 4113a1e1..00000000
--- a/.github/workflows/unit-testing-kestrel2.yml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: Unit testing on PR
-
-on:
-  push:
-    branches:
-      - develop
-      - develop_*
-    paths:
-      - 'packages-nextgen/**'
-  pull_request:
-    branches:
-      - develop
-      - develop_*
-    paths:
-      - 'packages-nextgen/**'
-    types:
-      - opened
-      - reopened
-      - synchronize
-
-jobs:
-  test-kestrel-core:
-    strategy:
-      matrix:
-        os: [ubuntu-latest, macos-latest]
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
-    runs-on: ${{ matrix.os }}
-    defaults:
-      run:
-        shell: bash
-        working-directory: ./packages-nextgen/kestrel_core
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install Python Tools
-        run: pip install --upgrade pip setuptools wheel pytest
-      - name: Install kestrel_core
-        run: pip install .
-      - name: Unit testing
-        run: pytest -vv
-
-  test-kestrel-interface-opensearch:
-    strategy:
-      matrix:
-        os: [ubuntu-latest, macos-latest]
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
-    runs-on: ${{ matrix.os }}
-    defaults:
-      run:
-        shell: bash
-        working-directory: ./packages-nextgen/kestrel_interface_opensearch
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install Python Tools
-        run: pip install --upgrade pip setuptools wheel pytest
-      - name: Install kestrel_core
-        working-directory: ./packages-nextgen/kestrel_core
-        run: pip install .
-      - name: Install kestrel_interface_opensearch
-        run: pip install .
-      - name: Unit testing
-        run: pytest -vv
diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml
index 8af6b843..d23c33b1 100644
--- a/.github/workflows/unit-testing.yml
+++ b/.github/workflows/unit-testing.yml
@@ -3,14 +3,12 @@ name: Unit testing on PR
 on:
   push:
     branches:
-      - develop
-      - develop_*
+      - develop_v1
     paths:
       - 'packages/**'
   pull_request:
     branches:
-      - develop
-      - develop_*
+      - develop_v1
     paths:
       - 'packages/**'
     types:
diff --git a/.github/workflows/unused-import.yml b/.github/workflows/unused-import.yml
index e1174ba5..1ef42972 100644
--- a/.github/workflows/unused-import.yml
+++ b/.github/workflows/unused-import.yml
@@ -3,14 +3,12 @@ name: Unused imports check
 on:
   push:
     branches:
-      - develop
-      - develop_*
+      - develop_v1
     paths:
       - 'packages/*/src/**'
   pull_request:
     branches:
-      - develop
-      - develop_*
+      - develop_v1
     paths:
       - 'packages/*/src/**'
     types:
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 3fd92963..eebfd6a2 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -9,6 +9,26 @@ The format is based on `Keep a Changelog`_.
 Unreleased
 ==========
 
+1.8.5 (2024-05-01)
+==================
+
+Added
+-----
+
+- cli/diag: add start/stop/last options
+- subquery generation support in stix-shifter interface based on specified time window
+- configuration doc on `subquery_time_window`
+
+Changed
+-------
+
+- cli/diag: change default timeframe to last 5 minutes
+
+Fixed
+-----
+
+- Repeated queries when stix-shifter pagination is off
+
 1.8.4 (2024-04-23)
 ==================
 
diff --git a/packages-nextgen/kestrel_core/README.rst b/packages-nextgen/kestrel_core/README.rst
deleted file mode 120000
index c768ff7d..00000000
--- a/packages-nextgen/kestrel_core/README.rst
+++ /dev/null
@@ -1 +0,0 @@
-../../README.rst
\ No newline at end of file
diff --git a/packages-nextgen/kestrel_core/pyproject.toml b/packages-nextgen/kestrel_core/pyproject.toml
deleted file mode 100644
index e57a5bca..00000000
--- a/packages-nextgen/kestrel_core/pyproject.toml
+++ /dev/null
@@ -1,61 +0,0 @@
-[build-system]
-requires = ["setuptools >= 68.2.2", "wheel"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "kestrel_core"
-version = "2.0.0"
-description = "Kestrel Threat Hunting Language"
-readme = "README.rst"
-requires-python = ">=3.8"
-license = {text = "Apache 2.0 License"}
-maintainers = [
-    {name = "Xiaokui Shu", email = "xiaokui.shu@ibm.com"},
-    {name = "Paul Coccoli", email = "pcoccoli@us.ibm.com"},
-]
-keywords = [
-    "kestrel",
-    "language",
-    "DSL",
-    "cybersecurity",
-    "threat hunting",
-    "huntflow",
-    "entity",
-]
-classifiers = [
-    "Topic :: Security",
-    "Operating System :: OS Independent",
-    "Development Status :: 4 - Beta",
-    "Programming Language :: Python :: 3",
-]
-
-dependencies = [
-    "typeguard>=4.1.5",
-    "pyyaml>=6.0.1",
-    "lark>=1.1.7",
-    "pandas>=2.0.3",
-    "pyarrow>=13.0.0",
-    "mashumaro>=3.10",
-    "networkx>=3.1", # networkx==3.2.1 only for Python>=3.9
-    "SQLAlchemy>=2.0.23",
-    "dpath>=2.1.6",
-]
-
-[project.optional-dependencies]
-dev = [
-    "black",
-]
-test = [
-    "pytest",
-]
-
-[project.urls]
-Homepage = "https://github.com/opencybersecurityalliance/kestrel-lang"
-Documentation = "https://kestrel.readthedocs.io/"
-Repository = "https://github.com/opencybersecurityalliance/kestrel-lang.git"
-
-[tool.setuptools.packages.find]
-where = ["src"]
-
-[tool.setuptools.package-data]
-"*" = ["*.lark", "*.yaml"]
diff --git a/packages-nextgen/kestrel_core/src/kestrel/__future__.py b/packages-nextgen/kestrel_core/src/kestrel/__future__.py
deleted file mode 100644
index efe66a26..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/__future__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import sys
-from typeguard import typechecked
-
-
-"""Entrance to invoke any backward compatibility patch
-
-This module is for developers to quickly locate backward compatibility pathes
-in Kestrel code and remove them through time.
-"""
-
-
-@typechecked
-def is_python_older_than_minor_version(minor: int) -> bool:
-    return sys.version_info.minor < minor
diff --git a/packages-nextgen/kestrel_core/src/kestrel/__init__.py b/packages-nextgen/kestrel_core/src/kestrel/__init__.py
deleted file mode 100644
index 738b8b89..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from kestrel.session import Session
diff --git a/packages-nextgen/kestrel_core/src/kestrel/cache/__init__.py b/packages-nextgen/kestrel_core/src/kestrel/cache/__init__.py
deleted file mode 100644
index 66614485..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/cache/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from kestrel.cache.base import AbstractCache
-from kestrel.cache.inmemory import InMemoryCache
-from kestrel.cache.sqlite import SqliteCache
diff --git a/packages-nextgen/kestrel_core/src/kestrel/cache/base.py b/packages-nextgen/kestrel_core/src/kestrel/cache/base.py
deleted file mode 100644
index 4d1a94bb..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/cache/base.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from __future__ import annotations
-from pandas import DataFrame
-from typing import MutableMapping
-from uuid import UUID
-from abc import abstractmethod
-
-from kestrel.config.internal import CACHE_INTERFACE_IDENTIFIER
-from kestrel.interface import AbstractInterface
-
-
-class AbstractCache(AbstractInterface, MutableMapping):
-    """Base class for Kestrel cache
-
-    Additional @abstractmethod from AbstractInterface:
-
-        - evaluate_graph()
-    """
-
-    @staticmethod
-    def schemes() -> Iterable[str]:
-        return [CACHE_INTERFACE_IDENTIFIER]
-
-    @abstractmethod
-    def __del__(self):
-        """Delete the cache and release memory/disk space"""
-        ...
-
-    @abstractmethod
-    def __getitem__(self, instruction_id: UUID) -> DataFrame:
-        """Get the dataframe for the cached instruction
-
-        This method will automatically support `uuid in cache`
-
-        Parameters:
-            instruction_id: id of the instruction
-
-        Returns:
-            dataframe of the given (likely Variable) instruction
-        """
-        ...
-
-    @abstractmethod
-    def __setitem__(self, instruction_id: UUID, data: DataFrame):
-        """Store the dataframe of an instruction into cache
-
-        Parameters:
-
-            instruction_id: id of the instruction
-
-            data: data associated with the instruction
-        """
-        ...
-
-    @abstractmethod
-    def __delitem__(self, instruction_id: UUID):
-        """Delete cached item
-
-        Parameters:
-            instruction_id: id of the instruction
-        """
-        ...
-
-    @abstractmethod
-    def get_virtual_copy(self) -> AbstractCache:
-        """Create a virtual cache object from this cache
-
-        This method needs to reimplement __del__, __getitem__, __setitem__,
-        __delitem__ to not actually hit the store media, e.g., SQLite.
-
-        The virtual cache is useful for the implementation of the Explain()
-        instruction, pretending the dependent graphs are evaluated, so the
-        evaluation can continue towards the Return() instruction.
-
-        Because Python invokes special methods from class methods, replacing
-        the __getitem__, __setitem__, and __delitem__ in the object does not
-        help. It is better to derive a subclass and replace __class__ of the
-        object to the subclass to correctly invoke the new set of __xitem___.
-
-        https://docs.python.org/3/reference/datamodel.html#special-lookup
-
-        And Python garbage collector could clean up the virtual cache when
-        not in use, so the __del__ method should be reimplemented to make
-        sure the store media is not closed.
-        """
-        ...
-
-    def store(self, instruction_id: UUID, data: DataFrame):
-        self[instruction_id] = data
-
-    def __iter__(self) -> UUID:
-        """Return UUIDs of instructions cached
-
-        Returns:
-            UUIDs in iterator
-        """
-        return iter(self.cache_catalog)
-
-    def __len__(self) -> int:
-        """How many items are cached"""
-        return len(self.cache_catalog)
diff --git a/packages-nextgen/kestrel_core/src/kestrel/cache/inmemory.py b/packages-nextgen/kestrel_core/src/kestrel/cache/inmemory.py
deleted file mode 100644
index 87557222..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/cache/inmemory.py
+++ /dev/null
@@ -1,136 +0,0 @@
-from copy import copy
-from pandas import DataFrame
-from typeguard import typechecked
-from uuid import UUID
-from typing import (
-    Mapping,
-    MutableMapping,
-    Optional,
-    Iterable,
-    Any,
-)
-
-from kestrel.cache.base import AbstractCache
-from kestrel.ir.graph import IRGraphEvaluable
-from kestrel.display import GraphletExplanation, NativeQuery
-from kestrel.ir.instructions import (
-    Instruction,
-    Return,
-    Explain,
-    Variable,
-    Filter,
-    SourceInstruction,
-    TransformingInstruction,
-)
-from kestrel.interface.codegen.dataframe import (
-    evaluate_source_instruction,
-    evaluate_transforming_instruction,
-)
-
-
-@typechecked
-class InMemoryCache(AbstractCache):
-    def __init__(
-        self,
-        initial_cache: Mapping[UUID, DataFrame] = {},
-        session_id: Optional[UUID] = None,
-    ):
-        super().__init__(session_id)
-        self.cache: MutableMapping[UUID, DataFrame] = {}
-
-        # update() will call __setitem__() internally
-        self.update(initial_cache)
-
-    def __del__(self):
-        del self.cache
-
-    def __getitem__(self, instruction_id: UUID) -> DataFrame:
-        return self.cache[self.cache_catalog[instruction_id]]
-
-    def __delitem__(self, instruction_id: UUID):
-        del self.cache[self.cache_catalog[instruction_id]]
-        del self.cache_catalog[instruction_id]
-
-    def __setitem__(
-        self,
-        instruction_id: UUID,
-        data: DataFrame,
-    ):
-        self.cache_catalog[instruction_id] = instruction_id.hex
-        self.cache[self.cache_catalog[instruction_id]] = data
-
-    def get_virtual_copy(self) -> AbstractCache:
-        v = copy(self)
-        v.cache_catalog = copy(self.cache_catalog)
-        v.__class__ = InMemoryCacheVirtual
-        return v
-
-    def evaluate_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instructions_to_evaluate: Optional[Iterable[Instruction]] = None,
-    ) -> Mapping[UUID, DataFrame]:
-        mapping = {}
-        if not instructions_to_evaluate:
-            instructions_to_evaluate = graph.get_sink_nodes()
-        for instruction in instructions_to_evaluate:
-            df = self._evaluate_instruction_in_graph(graph, instruction)
-            self[instruction.id] = df
-            mapping[instruction.id] = df
-        return mapping
-
-    def explain_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instructions_to_explain: Optional[Iterable[Instruction]] = None,
-    ) -> Mapping[UUID, GraphletExplanation]:
-        mapping = {}
-        if not instructions_to_evaluate:
-            instructions_to_evaluate = graph.get_sink_nodes()
-        for instruction in instructions_to_evaluate:
-            dep_graph = graph.duplicate_dependent_subgraph_of_node(instruction)
-            graph_dict = dep_graph.to_dict()
-            query = NativeQuery("DataFrame", "")
-            mapping[instruction.id] = GraphletExplanation(graph_dict, query)
-        return mapping
-
-    def _evaluate_instruction_in_graph(
-        self, graph: IRGraphEvaluable, instruction: Instruction
-    ) -> DataFrame:
-        if instruction.id in self:
-            df = self[instruction.id]
-        elif isinstance(instruction, SourceInstruction):
-            df = evaluate_source_instruction(instruction)
-        elif isinstance(instruction, TransformingInstruction):
-            trunk, r2n = graph.get_trunk_n_branches(instruction)
-            df = self._evaluate_instruction_in_graph(graph, trunk)
-            if isinstance(instruction, (Return, Explain)):
-                pass
-            elif isinstance(instruction, Variable):
-                self[instruction.id] = df
-            else:
-                if isinstance(instruction, Filter):
-                    # replace each ReferenceValue with a list of values
-                    instruction.resolve_references(
-                        lambda x: list(
-                            self._evaluate_instruction_in_graph(graph, r2n[x]).iloc[
-                                :, 0
-                            ]
-                        )
-                    )
-                df = evaluate_transforming_instruction(instruction, df)
-        else:
-            raise NotImplementedError(f"Unknown instruction type: {instruction}")
-        return df
-
-
-@typechecked
-class InMemoryCacheVirtual(InMemoryCache):
-    def __getitem__(self, instruction_id: UUID) -> Any:
-        return self.cache_catalog[instruction_id]
-
-    def __delitem__(self, instruction_id: UUID):
-        del self.cache_catalog[instruction_id]
-
-    def __setitem__(self, instruction_id: UUID, data: Any):
-        self.cache_catalog[instruction_id] = "virtual" + instruction_id.hex
diff --git a/packages-nextgen/kestrel_core/src/kestrel/cache/sqlite.py b/packages-nextgen/kestrel_core/src/kestrel/cache/sqlite.py
deleted file mode 100644
index 97b8fb13..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/cache/sqlite.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import logging
-from copy import copy
-from typing import Iterable, Mapping, Optional, Union, Any
-from uuid import UUID
-
-import sqlalchemy
-from dateutil.parser import parse as dt_parser
-from pandas import DataFrame, read_sql
-from typeguard import typechecked
-
-from kestrel.cache.base import AbstractCache
-from kestrel.interface.codegen.sql import SqlTranslator
-from kestrel.ir.graph import IRGraphEvaluable
-from kestrel.display import GraphletExplanation, NativeQuery
-from kestrel.ir.instructions import (
-    Construct,
-    Instruction,
-    Return,
-    Explain,
-    Variable,
-    Filter,
-    SourceInstruction,
-    TransformingInstruction,
-    SolePredecessorTransformingInstruction,
-)
-
-_logger = logging.getLogger(__name__)
-
-
-@typechecked
-class SqliteTranslator(SqlTranslator):
-    def __init__(self, from_obj: Union[SqlTranslator, str]):
-        if isinstance(from_obj, SqlTranslator):
-            fc = from_obj.query.subquery(name=from_obj.associated_variable)
-        else:  # str to represent table name
-            fc = sqlalchemy.table(from_obj)
-        super().__init__(
-            sqlalchemy.dialects.sqlite.dialect(), dt_parser, "time", fc
-        )  # FIXME: need mapping for timestamp?
-        self.associated_variable = None
-
-
-@typechecked
-class SqliteCache(AbstractCache):
-    def __init__(
-        self,
-        initial_cache: Optional[Mapping[UUID, DataFrame]] = None,
-        session_id: Optional[UUID] = None,
-    ):
-        super().__init__()
-
-        basename = session_id or "cache"
-        self.db_path = f"{basename}.db"
-
-        # for an absolute file path, the three slashes are followed by the absolute path
-        # for a relative path, it's also three slashes?
-        self.engine = sqlalchemy.create_engine(f"sqlite:///{self.db_path}")
-        self.connection = self.engine.connect()
-
-        if initial_cache:
-            for instruction_id, data in initial_cache.items():
-                self[instruction_id] = data
-
-    def __del__(self):
-        self.connection.close()
-
-    def __getitem__(self, instruction_id: UUID) -> DataFrame:
-        return read_sql(self.cache_catalog[instruction_id], self.connection)
-
-    def __delitem__(self, instruction_id: UUID):
-        table_name = self.cache_catalog[instruction_id]
-        self.connection.execute(sqlalchemy.text(f'DROP TABLE "{table_name}"'))
-        del self.cache_catalog[instruction_id]
-
-    def __setitem__(
-        self,
-        instruction_id: UUID,
-        data: DataFrame,
-    ):
-        table_name = instruction_id.hex
-        self.cache_catalog[instruction_id] = table_name
-        data.to_sql(table_name, con=self.connection, if_exists="replace", index=False)
-
-    def get_virtual_copy(self) -> AbstractCache:
-        v = copy(self)
-        v.cache_catalog = copy(self.cache_catalog)
-        v.__class__ = SqliteCacheVirtual
-        return v
-
-    def evaluate_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instructions_to_evaluate: Optional[Iterable[Instruction]] = None,
-    ) -> Mapping[UUID, DataFrame]:
-        mapping = {}
-        if not instructions_to_evaluate:
-            instructions_to_evaluate = graph.get_sink_nodes()
-        for instruction in instructions_to_evaluate:
-            _logger.debug(f"evaluate instruction: {instruction}")
-            translator = self._evaluate_instruction_in_graph(graph, instruction)
-            # TODO: may catch error in case evaluation starts from incomplete SQL
-            _logger.debug(f"SQL query generated: {translator.result_w_literal_binds()}")
-            mapping[instruction.id] = read_sql(translator.result(), self.connection)
-        return mapping
-
-    def explain_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instructions_to_explain: Optional[Iterable[Instruction]] = None,
-    ) -> Mapping[UUID, GraphletExplanation]:
-        mapping = {}
-        if not instructions_to_explain:
-            instructions_to_explain = graph.get_sink_nodes()
-        for instruction in instructions_to_explain:
-            dep_graph = graph.duplicate_dependent_subgraph_of_node(instruction)
-            graph_dict = dep_graph.to_dict()
-            translator = self._evaluate_instruction_in_graph(graph, instruction)
-            query = NativeQuery("SQL", str(translator.result_w_literal_binds()))
-            mapping[instruction.id] = GraphletExplanation(graph_dict, query)
-        return mapping
-
-    def _evaluate_instruction_in_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instruction: Instruction,
-    ) -> SqliteTranslator:
-        if instruction.id in self:
-            # cached in sqlite
-            table_name = self.cache_catalog[instruction.id]
-            translator = SqliteTranslator(table_name)
-
-        elif isinstance(instruction, SourceInstruction):
-            if isinstance(instruction, Construct):
-                # cache the data
-                self[instruction.id] = DataFrame(instruction.data)
-                # pull the data to start a SqliteTranslator
-                table_name = self.cache_catalog[instruction.id]
-                translator = SqliteTranslator(table_name)
-            else:
-                raise NotImplementedError(f"Unknown instruction type: {instruction}")
-
-        elif isinstance(instruction, TransformingInstruction):
-            trunk, r2n = graph.get_trunk_n_branches(instruction)
-            translator = self._evaluate_instruction_in_graph(graph, trunk)
-
-            if isinstance(instruction, SolePredecessorTransformingInstruction):
-                if isinstance(instruction, (Return, Explain)):
-                    pass
-                elif isinstance(instruction, Variable):
-                    # start a new translator and use previous one as subquery
-                    # this allows using the variable as a dependent node
-                    # if the variable is a sink, `SELECT * FROM (subquery)` also works
-                    translator.associated_variable = instruction.name
-                    translator = SqliteTranslator(translator)
-                else:
-                    translator.add_instruction(instruction)
-
-            elif isinstance(instruction, Filter):
-                # replace each ReferenceValue with a subquery
-                # note that this subquery will be used as a value for the .in_ operator
-                # we should not use .subquery() here but just `Select` class
-                # otherwise, will get warning:
-                #   SAWarning: Coercing Subquery object into a select() for use in IN();
-                #   please pass a select() construct explicitly
-                instruction.resolve_references(
-                    lambda x: self._evaluate_instruction_in_graph(graph, r2n[x]).query
-                )
-                translator.add_instruction(instruction)
-
-            else:
-                raise NotImplementedError(f"Unknown instruction type: {instruction}")
-
-        else:
-            raise NotImplementedError(f"Unknown instruction type: {instruction}")
-
-        return translator
-
-
-@typechecked
-class SqliteCacheVirtual(SqliteCache):
-    def __getitem__(self, instruction_id: UUID) -> Any:
-        return self.cache_catalog[instruction_id]
-
-    def __delitem__(self, instruction_id: UUID):
-        del self.cache_catalog[instruction_id]
-
-    def __setitem__(self, instruction_id: UUID, data: Any):
-        self.cache_catalog[instruction_id] = instruction_id.hex + "v"
-
-    def __del__(self):
-        pass
diff --git a/packages-nextgen/kestrel_core/src/kestrel/cli.py b/packages-nextgen/kestrel_core/src/kestrel/cli.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/config/__init__.py b/packages-nextgen/kestrel_core/src/kestrel/config/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/config/internal.py b/packages-nextgen/kestrel_core/src/kestrel/config/internal.py
deleted file mode 100644
index ed9fd2b1..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/config/internal.py
+++ /dev/null
@@ -1 +0,0 @@
-CACHE_INTERFACE_IDENTIFIER = "cache"
diff --git a/packages-nextgen/kestrel_core/src/kestrel/config/kestrel.yaml b/packages-nextgen/kestrel_core/src/kestrel/config/kestrel.yaml
deleted file mode 100644
index ccdd38b1..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/config/kestrel.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-# syntax default values
-language:
-  default_variable: "_"
-  default_sort_order: "desc"
-  default_datasource_schema: "stixshifter"
-  default_analytics_schema: "python"
-
-# how a Kestrel session is executed
-session:
-  cache_directory_prefix: "kestrel-session-" # under system temp directory
-  local_database_path: "local.db"
-  log_path: "session.log"
-  show_execution_summary: true
-
-# whether/how to prefetch all records/observations for entities
-prefetch:
-
-  # enable/disable prefetch for command
-  #
-  # If prefetch is enabled, Kestrel will send additional queries to the data
-  # source to search for related records regarding entities retrieved from the
-  # user-specified pattern, collecting more complete information (attributes,
-  # connections to other entities) of the entities from different records.
-  switch_per_command:
-    get: true
-    find: true
-
-  # declare the list of entity types to not prefetch
-  #
-  # This can be used when a user finds prefetch hinders the performance with
-  # large amount of results for one or more generic type of entities. For
-  # example, the data source may have millions of records containing
-  # `C:\Windows\SYSTEM32\ntdll.dll` touched by all Windows processes in a short
-  # amount of time. Executing a Kestrel command `f = FIND file LINKED p` will
-  # retrieve the file from a process and then start prefetch to gain
-  # information/connections of the file from all processes. Retrieval of
-  # millions records will likely result in a performance issue, thus the user
-  # can put `file` in this list to disable prefetch for it.
-  excluded_entities:
-    -
-    # - file
-    # - user-account
-    # - x-oca-asset
-
-  # Detailed logic to identify the same process from different records is more
-  # complex than many data source query language can express, so Kestrel
-  # retrieves potential same process candidate records and perform fine-grained
-  # process identification in Kestrel with these parameters.
-  process_identification:
-    pid_but_name_changed_time_begin_offset: -5 # seconds
-    pid_but_name_changed_time_end_offset: 5 # seconds
-    pid_and_name_time_begin_offset: -3600 # seconds
-    pid_and_name_time_end_offset: 3600 # seconds
-    pid_and_ppid_time_begin_offset: -3600 # seconds
-    pid_and_ppid_time_end_offset: 3600 # seconds
-    pid_and_name_and_ppid_time_begin_offset: -86400 # seconds
-    pid_and_name_and_ppid_time_end_offset: 86400 # seconds
-
-# option when generating STIX query
-stixquery:
-  timerange_start_offset: -300 # seconds
-  timerange_stop_offset: 300 # seconds
-  support_id: false # STIX 2.0 does not support unique ID
-
-# debug options
-debug:
-  env_var: "KESTREL_DEBUG" # debug mode if the environment variable exists
-  cache_directory_prefix: "kestrel-" # under system temp directory
-  session_exit_marker: "session.exited"
-  maximum_exited_session: 3
diff --git a/packages-nextgen/kestrel_core/src/kestrel/config/utils.py b/packages-nextgen/kestrel_core/src/kestrel/config/utils.py
deleted file mode 100644
index 0b912e7a..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/config/utils.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import os
-import yaml
-from pathlib import Path
-import logging
-from typeguard import typechecked
-from typing import Mapping, Union
-
-from kestrel.utils import update_nested_dict, load_data_file
-
-CONFIG_DIR_DEFAULT = Path.home() / ".config" / "kestrel"
-CONFIG_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "kestrel.yaml"
-CONFIG_PATH_ENV_VAR = "KESTREL_CONFIG"  # override CONFIG_PATH_DEFAULT if provided
-
-_logger = logging.getLogger(__name__)
-
-
-@typechecked
-def load_default_config() -> Mapping:
-    _logger.debug(f"Loading default config file...")
-    default_config = load_data_file("kestrel.config", "kestrel.yaml")
-    config_with_envvar_expanded = os.path.expandvars(default_config)
-    config_content = yaml.safe_load(config_with_envvar_expanded)
-    return config_content
-
-
-@typechecked
-def load_user_config(
-    config_path_env_var: str, config_path_default: Union[str, Path]
-) -> Mapping:
-    config_path_default = config_path_default.absolute().as_posix()
-    config_path = os.getenv(config_path_env_var, config_path_default)
-    config_path = os.path.expanduser(config_path)
-    config = {}
-    if config_path:
-        try:
-            with open(config_path, "r") as fp:
-                _logger.debug(f"User configuration file found: {config_path}")
-                config = yaml.safe_load(os.path.expandvars(fp.read()))
-        except FileNotFoundError:
-            _logger.debug(f"User configuration file not exist.")
-    return config
-
-
-@typechecked
-def load_config() -> Mapping:
-    config_default = load_default_config()
-    config_user = load_user_config(CONFIG_PATH_ENV_VAR, CONFIG_PATH_DEFAULT)
-    _logger.debug(f"User configuration loaded: {config_user}")
-    _logger.debug(f"Updating default config with user config...")
-    return update_nested_dict(config_default, config_user)
diff --git a/packages-nextgen/kestrel_core/src/kestrel/display.py b/packages-nextgen/kestrel_core/src/kestrel/display.py
deleted file mode 100644
index e6729f85..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/display.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from typing import List, Union, Mapping
-from dataclasses import dataclass
-from mashumaro.mixins.json import DataClassJSONMixin
-from pandas import DataFrame
-
-
-@dataclass
-class NativeQuery(DataClassJSONMixin):
-    # which query language
-    language: str
-    # what query statement
-    statement: str
-
-
-@dataclass
-class GraphletExplanation(DataClassJSONMixin):
-    # serialized IRGraph
-    graph: Mapping
-    # data source query
-    query: NativeQuery
-
-
-@dataclass
-class GraphExplanation(DataClassJSONMixin):
-    graphlets: List[GraphletExplanation]
-
-
-# Kestrel Display Object
-Display = Union[
-    str,
-    dict,
-    DataFrame,
-    GraphExplanation,
-]
diff --git a/packages-nextgen/kestrel_core/src/kestrel/exceptions.py b/packages-nextgen/kestrel_core/src/kestrel/exceptions.py
deleted file mode 100644
index cd088afe..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/exceptions.py
+++ /dev/null
@@ -1,120 +0,0 @@
-class KestrelError(Exception):
-    pass
-
-
-class InstructionNotFound(KestrelError):
-    pass
-
-
-class InvalidInstruction(KestrelError):
-    pass
-
-
-class InvalidSeralizedGraph(KestrelError):
-    pass
-
-
-class InvalidSeralizedInstruction(KestrelError):
-    pass
-
-
-class InvalidDataSource(KestrelError):
-    pass
-
-
-class VariableNotFound(KestrelError):
-    pass
-
-
-class ReferenceNotFound(KestrelError):
-    pass
-
-
-class DataSourceNotFound(KestrelError):
-    pass
-
-
-class DuplicatedVariable(KestrelError):
-    pass
-
-
-class DuplicatedReference(KestrelError):
-    pass
-
-
-class DuplicatedDataSource(KestrelError):
-    pass
-
-
-class DuplicatedSingletonInstruction(KestrelError):
-    pass
-
-
-class MultiInterfacesInGraph(KestrelError):
-    pass
-
-
-class MultiSourcesInGraph(KestrelError):
-    pass
-
-
-class LargerThanOneIndegreeInstruction(KestrelError):
-    pass
-
-
-class DanglingReferenceInFilter(KestrelError):
-    pass
-
-
-class DanglingFilter(KestrelError):
-    pass
-
-
-class DuplicatedReferenceInFilter(KestrelError):
-    pass
-
-
-class MissingReferenceInFilter(KestrelError):
-    pass
-
-
-class InvalidSerializedDatasourceInterfaceCacheCatalog(KestrelError):
-    pass
-
-
-class InevaluableInstruction(KestrelError):
-    pass
-
-
-class MappingParseError(KestrelError):
-    pass
-
-
-class InterfaceNotFound(KestrelError):
-    pass
-
-
-class IRGraphMissingNode(KestrelError):
-    pass
-
-
-class InterfaceNotConfigured(KestrelError):
-    pass
-
-
-class InvalidInterfaceImplementation(KestrelError):
-    pass
-
-
-class ConflictingInterfaceScheme(KestrelError):
-    pass
-
-
-class DataSourceError(KestrelError):
-    pass
-
-
-class UnsupportedOperatorError(KestrelError):
-    """The data source doesn't support this operator"""
-
-    pass
diff --git a/packages-nextgen/kestrel_core/src/kestrel/frontend/__init__.py b/packages-nextgen/kestrel_core/src/kestrel/frontend/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/frontend/compile.py b/packages-nextgen/kestrel_core/src/kestrel/frontend/compile.py
deleted file mode 100644
index cb1f897f..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/frontend/compile.py
+++ /dev/null
@@ -1,398 +0,0 @@
-# Lark Transformer
-
-import logging
-from datetime import datetime, timedelta
-from functools import reduce
-
-from dateutil.parser import parse as to_datetime
-from lark import Transformer, Token
-from typeguard import typechecked
-
-from kestrel.mapping.data_model import translate_comparison_to_ocsf
-from kestrel.utils import unescape_quoted_string
-from kestrel.ir.filter import (
-    FExpression,
-    FComparison,
-    IntComparison,
-    FloatComparison,
-    StrComparison,
-    ListComparison,
-    RefComparison,
-    ReferenceValue,
-    MultiComp,
-    ListOp,
-    NumCompOp,
-    StrCompOp,
-    ExpOp,
-    BoolExp,
-    TimeRange,
-)
-from kestrel.ir.graph import (
-    IRGraph,
-    compose,
-)
-from kestrel.ir.instructions import (
-    Construct,
-    DataSource,
-    Filter,
-    Limit,
-    Offset,
-    ProjectAttrs,
-    ProjectEntity,
-    Reference,
-    Return,
-    Sort,
-    Variable,
-    Explain,
-)
-from kestrel.exceptions import IRGraphMissingNode
-
-
-_logger = logging.getLogger(__name__)
-
-
-DEFAULT_VARIABLE = "_"
-DEFAULT_SORT_ORDER = "DESC"
-
-
-@typechecked
-def _unescape_quoted_string(s: str):
-    if s.startswith("r"):
-        return s[2:-1]
-    else:
-        return s[1:-1].encode("utf-8").decode("unicode_escape")
-
-
-@typechecked
-def _create_comp(field: str, op_value: str, value) -> FComparison:
-    # TODO: implement MultiComp
-
-    if op_value in (ListOp.IN, ListOp.NIN):
-        op = ListOp
-        comp = RefComparison if isinstance(value, ReferenceValue) else ListComparison
-    elif isinstance(value, int):
-        op = NumCompOp
-        comp = IntComparison
-    elif isinstance(value, float):
-        op = NumCompOp
-        comp = FloatComparison
-    elif isinstance(value, ReferenceValue):
-        op = ListOp
-        op_value = ListOp.IN if op_value in (ListOp.IN, StrCompOp.EQ) else ListOp.NIN
-        comp = RefComparison
-    else:
-        op = StrCompOp
-        comp = StrComparison
-    return comp(field, op(op_value), value)
-
-
-@typechecked
-def _map_filter_exp(
-    entity_name: str, filter_exp: FExpression, property_map: dict
-) -> FExpression:
-    if isinstance(
-        filter_exp,
-        (IntComparison, FloatComparison, StrComparison, ListComparison, RefComparison),
-    ):
-        # get the field
-        field = filter_exp.field
-        # add entity to field if it doesn't have one already
-        if ":" not in field:
-            field = f"{entity_name}:{field}"
-        # map field to new syntax (e.g. STIX to OCSF)
-        # TODO: ECS to OCSF?  Would need to merge STIX and ECS data model maps.
-        map_result = translate_comparison_to_ocsf(
-            property_map, field, filter_exp.op, filter_exp.value
-        )
-        # Build a MultiComp if field maps to several values
-        if len(map_result) > 1:
-            filter_exp = MultiComp(
-                ExpOp.OR,
-                [_create_comp(field, op, value) for field, op, value in map_result],
-            )
-        elif len(map_result) == 1:  # it maps to a single value
-            mapping = map_result[0]
-            _logger.debug("mapping = %s", mapping)
-            field = mapping[0]
-            prefix = f"{entity_name}."
-            if field.startswith(prefix):
-                # Need to prune the entity name
-                field = field[len(prefix) :]
-            filter_exp.field = field
-            filter_exp.op = mapping[1]
-            filter_exp.value = mapping[2]
-        else:  # pass-through
-            pass
-        # TODO: for RefComparison, map the attribute in value (may not be possible here)
-
-    elif isinstance(filter_exp, BoolExp):
-        # recursively map boolean expressions
-        filter_exp = BoolExp(
-            _map_filter_exp(entity_name, filter_exp.lhs, property_map),
-            filter_exp.op,
-            _map_filter_exp(entity_name, filter_exp.rhs, property_map),
-        )
-    elif isinstance(filter_exp, MultiComp):
-        # normally, this should be unreachable
-        # if this becomes a valid case, we need to change
-        # the definition of MultiComp to accept a MultiComp
-        # in addition to Comparisons in its `comps` list
-        filter_exp = MultiComp(
-            filter_exp.op,
-            [_map_filter_exp(entity_name, x, property_map) for x in filter_exp.comps],
-        )
-    return filter_exp
-
-
-@typechecked
-def _add_reference_branches_for_filter(graph: IRGraph, filter_node: Filter):
-    if filter_node not in graph:
-        raise IRGraphMissingNode("Internal error: filter node expected")
-    else:
-        for refvalue in filter_node.get_references():
-            r = graph.add_node(Reference(refvalue.reference))
-            p = graph.add_node(ProjectAttrs([refvalue.attribute]), r)
-            graph.add_edge(p, filter_node)
-
-
-class _KestrelT(Transformer):
-    def __init__(
-        self,
-        default_variable=DEFAULT_VARIABLE,
-        default_sort_order=DEFAULT_SORT_ORDER,
-        token_prefix="",
-        entity_map={},
-        property_map={},
-    ):
-        # token_prefix is the modification by Lark when using `merge_transformers()`
-        self.default_variable = default_variable
-        self.default_sort_order = default_sort_order
-        self.token_prefix = token_prefix
-        self.entity_map = entity_map
-        self.property_map = property_map  # TODO: rename to data_model_map?
-        super().__init__()
-
-    def start(self, args):
-        return reduce(compose, args, IRGraph())
-
-    def statement(self, args):
-        return args[0]
-
-    def assignment(self, args):
-        # TODO: move the var+var into expression in Lark
-        variable_node = Variable(args[0].value)
-        graph, root = args[1]
-        graph.add_node(variable_node, root)
-        return graph
-
-    def expression(self, args):
-        # TODO: add more clauses than WHERE and ATTR
-        # TODO: think about order of clauses when turning into nodes
-        graph = IRGraph()
-        reference = graph.add_node(args[0])
-        root = reference
-        if len(args) > 1:
-            for clause in args[1:]:
-                graph.add_node(clause, root)
-                root = clause
-                if isinstance(clause, Filter):
-                    # this is where_clause
-                    _add_reference_branches_for_filter(graph, clause)
-        return graph, root
-
-    def vtrans(self, args):
-        if len(args) == 1:
-            return Reference(args[0].value)
-        else:
-            # TODO: transformer support
-            ...
-
-    def new(self, args):
-        # TODO: use entity type
-
-        graph = IRGraph()
-        if len(args) == 1:
-            # Try to get entity type from first entity
-            data = args[0]
-        else:
-            data = args[1]
-        data_node = Construct(data)
-        graph.add_node(data_node)
-        return graph, data_node
-
-    def var_data(self, args):
-        if isinstance(args[0], Token):
-            # TODO
-            ...
-        else:
-            v = args[0]
-        return v
-
-    def json_objs(self, args):
-        return args
-
-    def json_obj(self, args):
-        return dict(args)
-
-    def json_pair(self, args):
-        v = args[0].value
-        if "ESCAPED_STRING" in args[0].type:
-            v = unescape_quoted_string(v)
-        return v, args[1]
-
-    def json_value(self, args):
-        v = args[0].value
-        if args[0].type == self.token_prefix + "ESCAPED_STRING":
-            v = unescape_quoted_string(v)
-        elif args[0].type == self.token_prefix + "NUMBER":
-            v = float(v) if "." in v else int(v)
-        return v
-
-    def get(self, args):
-        graph = IRGraph()
-        entity_name = args[0].value
-        mapped_entity_name = self.entity_map.get(entity_name, entity_name)
-
-        # prepare Filter node
-        filter_node = args[2]
-        filter_node.exp = _map_filter_exp(
-            args[0].value, filter_node.exp, self.property_map
-        )
-
-        # add basic Source and Filter nodes
-        source_node = graph.add_node(args[1])
-        filter_node = graph.add_node(filter_node, source_node)
-
-        # add reference nodes if used in Filter
-        _add_reference_branches_for_filter(graph, filter_node)
-
-        projection_node = graph.add_node(ProjectEntity(mapped_entity_name), filter_node)
-        root = projection_node
-        if len(args) > 3:
-            for arg in args[3:]:
-                if isinstance(arg, TimeRange):
-                    filter_node.timerange = args[3]
-                elif isinstance(arg, Limit):
-                    root = graph.add_node(arg, projection_node)
-        return graph, root
-
-    def where_clause(self, args):
-        exp = args[0]
-        return Filter(exp)
-
-    def attr_clause(self, args):
-        attrs = args[0].split(",")
-        attrs = [attr.strip() for attr in attrs]
-        return ProjectAttrs(attrs)
-
-    def sort_clause(self, args):
-        # args[0] is Token('BY', 'BY')
-        return Sort(*args[1:])
-
-    def expression_or(self, args):
-        return BoolExp(args[0], ExpOp.OR, args[1])
-
-    def expression_and(self, args):
-        return BoolExp(args[0], ExpOp.AND, args[1])
-
-    def comparison_std(self, args):
-        """Emit a Comparison object for a Filter"""
-        field = args[0].value
-        op = args[1]
-        value = args[2]
-        comp = _create_comp(field, op, value)
-        return comp
-
-    def op(self, args):
-        """Convert operator token to a plain string"""
-        return " ".join([arg.upper() for arg in args])
-
-    def op_keyword(self, args):
-        """Convert operator token to a plain string"""
-        return args[0].value
-
-    # Literals
-    def advanced_string(self, args):
-        value = _unescape_quoted_string(args[0].value)
-        return value
-
-    def reference_or_simple_string(self, args):
-        vname = args[0].value
-        attr = args[1].value if len(args) > 1 else None
-        return ReferenceValue(vname, attr)
-
-    def number(self, args):
-        v = args[0].value
-        try:
-            return int(v)
-        except ValueError:
-            return float(v)
-
-    def value(self, args):
-        return args[0]
-
-    def literal_list(self, args):
-        return args
-
-    def literal(self, args):
-        return args[0]
-
-    def datasource(self, args):
-        return DataSource(args[0].value)
-
-    # Timespans
-    def timespan_relative(self, args):
-        num = int(args[0])
-        unit = args[1]
-        if unit == "DAY":
-            delta = timedelta(days=num)
-        elif unit == "HOUR":
-            delta = timedelta(hours=num)
-        elif unit == "MINUTE":
-            delta = timedelta(minutes=num)
-        elif unit == "SECOND":
-            delta = timedelta(seconds=num)
-        stop = datetime.utcnow()
-        start = stop - delta
-        return TimeRange(start, stop)
-
-    def timespan_absolute(self, args):
-        start = to_datetime(args[0])
-        stop = to_datetime(args[1])
-        return TimeRange(start, stop)
-
-    def day(self, _args):
-        return "DAY"
-
-    def hour(self, _args):
-        return "HOUR"
-
-    def minute(self, _args):
-        return "MINUTE"
-
-    def second(self, _args):
-        return "SECOND"
-
-    def timestamp(self, args):
-        return args[0]
-
-    # Limit
-    def limit_clause(self, args):
-        n = int(args[0])
-        return Limit(n)
-
-    def offset_clause(self, args):
-        n = int(args[0])
-        return Offset(n)
-
-    def disp(self, args):
-        graph, root = args[0]
-        graph.add_node(Return(), root)
-        return graph
-
-    def explain(self, args):
-        graph = IRGraph()
-        reference = graph.add_node(Reference(args[0].value))
-        explain = graph.add_node(Explain(), reference)
-        graph.add_node(Return(), explain)
-        return graph
diff --git a/packages-nextgen/kestrel_core/src/kestrel/frontend/completer.py b/packages-nextgen/kestrel_core/src/kestrel/frontend/completer.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/frontend/kestrel.lark b/packages-nextgen/kestrel_core/src/kestrel/frontend/kestrel.lark
deleted file mode 100644
index 1e00bfc9..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/frontend/kestrel.lark
+++ /dev/null
@@ -1,302 +0,0 @@
-//
-// Kestrel Grammar
-//
-
-//
-// A huntflow is a sequence of statements
-//
-
-start: statement*
-
-statement: assignment
-         | command_no_result
-
-// If no VARIABLE is given, default to _ in post-parsing
-// For assign or merge, the result variable is required
-// This eliminates meaningless huntflows like `var1 var2 var3`
-assignment: VARIABLE "=" expression
-          | VARIABLE "=" VARIABLE ("+" VARIABLE)+
-          | (VARIABLE "=")? command_with_result
-
-// "?" at the beginning will inline command
-?command_with_result: find
-                    | get
-                    | group
-                    | join
-                    | load
-                    | new
-                    | sort
-
-?command_no_result: apply
-                  | explain
-                  | describe
-                  | disp
-                  | info
-                  | save
-
-//
-// All commands
-//
-
-find: "FIND"i ENTITY_TYPE RELATION (REVERSED)? VARIABLE where_clause? timespan? limit_clause?
-
-get: "GET"i ENTITY_TYPE ("FROM"i datasource)? where_clause timespan? limit_clause?
-
-group: "GROUP"i VARIABLE BY grp_spec ("WITH"i agg_list)?
-
-join: "JOIN"i VARIABLE "," VARIABLE (BY ATTRIBUTE "," ATTRIBUTE)?
-
-load: "LOAD"i stdpath ("AS"i ENTITY_TYPE)?
-
-new: "NEW"i ENTITY_TYPE? var_data
-
-sort: "SORT"i VARIABLE BY ATTRIBUTE (ASC|DESC)?
-
-apply: "APPLY"i analytics_uri "ON"i variables ("WITH"i args)?
-
-disp: "DISP"i expression
-
-info: "INFO"i VARIABLE
-
-save: "SAVE"i VARIABLE "TO"i stdpath
-
-describe: "DESCRIBE"i var_attr
-
-explain: "EXPLAIN"i VARIABLE
-
-//
-// Variable definition
-//
-
-variables: VARIABLE ("," VARIABLE)*
-
-VARIABLE: CNAME
-
-//
-// Expression
-//
-
-expression: vtrans where_clause? attr_clause? sort_clause? limit_clause? offset_clause?
-
-// not use rule name `transform` since it is a special function in Lark
-// the function in transformer will mal-function in `merge_transformers()`
-vtrans: transformer "(" VARIABLE ")"
-      | VARIABLE
-
-transformer: TIMESTAMPED
-           | ADDOBSID
-           | RECORDS
-
-TIMESTAMPED: "TIMESTAMPED"i
-ADDOBSID: "ADDOBSID"i
-RECORDS: "RECORDS"i
-
-where_clause: "WHERE"i ecg_pattern
-attr_clause: "ATTR"i ATTRIBUTES
-sort_clause: "SORT"i BY ATTRIBUTE (ASC|DESC)?
-limit_clause: "LIMIT"i INT
-offset_clause: "OFFSET"i INT
-
-?ecg_pattern: disjunction
-            | "[" disjunction "]" // STIX compatible
-
-?disjunction: conjunction
-            | disjunction "OR"i conjunction -> expression_or
-
-?conjunction: comparison
-            | conjunction "AND"i comparison -> expression_and
-
-?comparison: comparison_std
-           | comparison_null
-           | "(" disjunction ")"
-
-comparison_std:  ENTITY_ATTRIBUTE_PATH op      value
-comparison_null: ENTITY_ATTRIBUTE_PATH null_op NULL
-
-//
-// Timespan
-//
-
-?timespan: "start"i timestamp "stop"i timestamp -> timespan_absolute
-         | "last"i INT timeunit                 -> timespan_relative
-
-?timeunit: day
-         | hour
-         | minute
-         | second
-
-day: "days"i | "day"i | "d"i
-hour: "hours"i | "hour"i | "h"i
-minute: "minutes"i | "minute"i | "m"i
-second: "seconds"i | "second"i | "s"i
-
-timestamp:       ISOTIMESTAMP
-         | "\""  ISOTIMESTAMP "\""
-         | "'"   ISOTIMESTAMP "'"
-         | "t\"" ISOTIMESTAMP "\""
-         | "t'"  ISOTIMESTAMP "'"
-
-ISOTIMESTAMP: /\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d(\.\d+)?Z/
-
-//
-// FIND command constructs
-//
-
-RELATION: WORD
-
-//
-// GROUP command constructs
-//
-
-grp_spec: grp_expr ("," grp_expr)*
-
-grp_expr: ATTRIBUTE
-        | bin_func
-
-// No other scalar funcs are supported yet
-bin_func: "BIN"i "(" ATTRIBUTE "," INT timeunit? ")"
-
-agg_list: agg ("," agg)*
-
-agg: funcname "(" ATTRIBUTE ")" ("AS"i alias)?
-
-?funcname: (MIN|MAX|SUM|AVG|COUNT|NUNIQUE)
-MIN: "MIN"i
-MAX: "MAX"i
-SUM: "SUM"i
-AVG: "AVG"i
-COUNT: "COUNT"i
-NUNIQUE: "NUNIQUE"i
-
-?alias: ECNAME
-
-//
-// GET command constructs
-//
-
-datasource: DATASRC_SIMPLE
-          | DATASRC_ESCAPED
-          | VARIABLE
-
-DATASRC_SIMPLE: PATH_SIMPLE ("," PATH_SIMPLE)*
-DATASRC_ESCAPED: PATH_ESCAPED
-
-//
-// APPLY command constructs
-//
-
-analytics_uri: ANALYTICS_SIMPLE
-             | ANALYTICS_ESCAPED
-
-ANALYTICS_SIMPLE: PATH_SIMPLE
-ANALYTICS_ESCAPED: PATH_ESCAPED
-
-//
-// Two-level JSON in command NEW
-//
-
-// use terminal to load the entire var_data without parsing into it
-var_data: "[" (RAW_VALUES | json_objs) "]"
-
-RAW_VALUES: ESCAPED_STRING_WS ("," ESCAPED_STRING_WS)*
-
-json_objs: json_obj ("," json_obj)*
-json_obj: "{" json_pair ("," json_pair)* "}"
-json_pair: ESCAPED_STRING ":" json_value
-json_value: (NUMBER|ESCAPED_STRING|TRUE|FALSE|NULL)
-
-//
-// Arguments
-//
-
-args: arg_kv_pair ("," arg_kv_pair)*
-
-arg_kv_pair: ECNAME "=" value
-
-//
-// Shared keywords
-//
-
-BY: "BY"i
-ASC: "ASC"i
-DESC: "DESC"i
-REVERSED: "BY"i
-TRUE: "TRUE"i
-FALSE: "FALSE"i
-NULL: "NULL"i
-IN: "IN"i
-LIKE: "LIKE"i
-MATCHES: "MATCHES"i
-IS: "IS"i
-NOT: "NOT"i
-ISSUBSET: "ISSUBSET"i
-ISSUPERSET: "ISSUPERSET"i
-
-op: OP_SIGN
-  | NOT? op_keyword
-
-OP_SIGN: /([!=]?=|[<>]=?)/
-
-op_keyword: IN
-          | LIKE
-          | MATCHES
-          | ISSUBSET
-          | ISSUPERSET
-
-null_op: IS NOT?
-
-//
-// Common language constructs
-//
-
-value: literal_list
-     | literal
-
-literal: reference_or_simple_string
-       | string
-       | number
-
-literal_list: "(" literal ("," literal)* ")"
-            | "[" literal ("," literal)* "]"
-
-reference_or_simple_string: ECNAME ("." ATTRIBUTE)?
-
-var_attr: ECNAME "." ATTRIBUTE
-
-?string: advanced_string
-
-number: NUMBER
-
-ENTITY_ATTRIBUTE_PATH: (ENTITY_TYPE ":")? ATTRIBUTE
-
-ENTITY_TYPE: ECNAME
-
-stdpath: PATH_SIMPLE
-       | PATH_ESCAPED
-
-// TODO: support attributes without quote for dash
-//       x.hash.SHA-256 instead of x.hash.'SHA-256'
-ATTRIBUTE: ECNAME "[*]"? ("." ECNAME_W_QUOTE)*
-ATTRIBUTES: ATTRIBUTE (WS* "," WS* ATTRIBUTE)*
-
-ECNAME: (LETTER|"_") (LETTER|DIGIT|"_"|"-")*
-ECNAME_W_QUOTE: (LETTER|DIGIT|"_"|"-"|"'")+
-
-PATH_SIMPLE: (ECNAME "://")? (LETTER|DIGIT|"_"|"-"|"."|"/")+
-
-PATH_ESCAPED: "\"" (ECNAME "://")? _STRING_ESC_INNER "\""
-            | "'"  (ECNAME "://")? _STRING_ESC_INNER "'"
-
-ESCAPED_STRING: "\"" _STRING_ESC_INNER "\""
-              | "'"  _STRING_ESC_INNER "'"
-ESCAPED_STRING_WS: WS* ESCAPED_STRING WS*
-
-// nearly Python string, but no [ubf]? as prefix options
-// check Lark example of Python parser for reference
-advanced_string: /(r?)("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/
-
-%import common (LETTER, DIGIT, WS, INT, WORD, NUMBER, CNAME, _STRING_ESC_INNER)
-%import common.SH_COMMENT -> COMMENT
-
-%ignore WS
-%ignore COMMENT
diff --git a/packages-nextgen/kestrel_core/src/kestrel/frontend/parser.py b/packages-nextgen/kestrel_core/src/kestrel/frontend/parser.py
deleted file mode 100644
index 0ff482c5..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/frontend/parser.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# parse Kestrel syntax, apply frontend mapping, transform to IR
-
-import logging
-import os
-from itertools import chain
-
-from kestrel.frontend.compile import _KestrelT
-from kestrel.mapping.data_model import reverse_mapping
-from kestrel.utils import load_data_file
-from lark import Lark
-from typeguard import typechecked
-import yaml
-
-
-_logger = logging.getLogger(__name__)
-
-
-frontend_mapping = {}
-
-
-@typechecked
-def get_mapping(mapping_type: str, mapping_package: str, mapping_filepath: str) -> dict:
-    global frontend_mapping
-    mapping = frontend_mapping.get(mapping_type)
-    if mapping is not None:
-        return mapping
-    try:
-        mapping_str = load_data_file(mapping_package, mapping_filepath)
-        mapping = yaml.safe_load(mapping_str)
-        if mapping_type == "property":
-            # New data model map is always OCSF->native
-            mapping = reverse_mapping(mapping)
-        frontend_mapping[mapping_type] = mapping
-    except Exception as ex:
-        _logger.error("Failed to load %s", mapping_str, exc_info=ex)
-        mapping = None  # FIXME: this is not a dict
-    return mapping
-
-
-@typechecked
-def get_keywords():
-    # TODO: this Kestrel1 code needs to be updated
-    grammar = load_data_file("kestrel.frontend", "kestrel.lark")
-    parser = Lark(grammar, parser="lalr")
-    alphabet_patterns = filter(lambda x: x.pattern.value.isalnum(), parser.terminals)
-    # keywords = [x.pattern.value for x in alphabet_patterns] + all_relations
-    keywords = [x.pattern.value for x in alphabet_patterns]
-    keywords_lower = map(lambda x: x.lower(), keywords)
-    keywords_upper = map(lambda x: x.upper(), keywords)
-    keywords_comprehensive = list(chain(keywords_lower, keywords_upper))
-    return keywords_comprehensive
-
-
-# Create a single, reusable transformer
-_parser = Lark(
-    load_data_file("kestrel.frontend", "kestrel.lark"),
-    parser="lalr",
-    transformer=_KestrelT(
-        entity_map=get_mapping(
-            "entity", "kestrel.mapping", os.path.join("entityname", "stix.yaml")
-        ),
-        property_map=get_mapping(
-            "property", "kestrel.mapping", os.path.join("entityattribute", "stix.yaml")
-        ),
-    ),
-)
-
-
-def parse_kestrel(stmts):
-    return _parser.parse(stmts)
diff --git a/packages-nextgen/kestrel_core/src/kestrel/interface/__init__.py b/packages-nextgen/kestrel_core/src/kestrel/interface/__init__.py
deleted file mode 100644
index 3c4b25e5..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/interface/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from kestrel.interface.base import AbstractInterface
-from kestrel.interface.manager import InterfaceManager
diff --git a/packages-nextgen/kestrel_core/src/kestrel/interface/base.py b/packages-nextgen/kestrel_core/src/kestrel/interface/base.py
deleted file mode 100644
index 50f5601f..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/interface/base.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import json
-from abc import ABC, abstractmethod
-from pandas import DataFrame
-from uuid import UUID
-from typing import (
-    Mapping,
-    MutableMapping,
-    Optional,
-    Iterable,
-)
-
-from kestrel.display import GraphletExplanation
-from kestrel.ir.instructions import Instruction
-from kestrel.ir.graph import IRGraphEvaluable
-from kestrel.exceptions import (
-    InvalidSerializedDatasourceInterfaceCacheCatalog,
-)
-
-
-MODULE_PREFIX = "kestrel_interface_"
-
-
-class AbstractInterface(ABC):
-    """Abstract class for datasource/analytics interface
-
-    Concepts:
-
-    - Think an interface as a datalake
-
-    - Think a datasource as a table in the datalake
-
-    Attributes:
-
-        session_id: the optional information to derive table name in datalake
-
-        datasources: map a datasource name to datalake table name
-
-        cache_catalog: map a cached item (instruction.id) to datalake table/view name
-    """
-
-    def __init__(
-        self,
-        serialized_cache_catalog: Optional[str] = None,
-        session_id: Optional[UUID] = None,
-    ):
-        self.session_id = session_id
-        self.cache_catalog: MutableMapping[UUID, str] = {}
-
-        if serialized_cache_catalog:
-            try:
-                self.cache_catalog = json.loads(serialized_cache_catalog)
-            except:
-                raise InvalidSerializedDatasourceInterfaceCacheCatalog()
-
-    # Python 3.13 will drop chain of @classmethod and @property
-    # use @staticmethod instead (cannot make it a property)
-    @staticmethod
-    @abstractmethod
-    def schemes() -> Iterable[str]:
-        """The schemes to specify the interface
-
-        Each scheme should be defined as ``("_"|LETTER) ("_"|LETTER|DIGIT)*``
-        """
-        ...
-
-    @abstractmethod
-    def store(
-        self,
-        instruction_id: UUID,
-        data: DataFrame,
-    ):
-        """Create a new table in the datalake from a dataframe
-
-        The name of the table is a function of instruction_id (and session_id)
-        in case there are conflicting tables in the datalake.
-
-        The function can be implemented as a hashtable. If the hash collides
-        with an existing hash, figure out whether the existing hash/table is
-        used by the current interface and session. If yes, then replace; if
-        not, then generate a new random value and record in self.cache_catalog.
-
-        This method will update self.cache_catalog.
-
-        Parameters:
-
-            instruction_id: the key to be placed in `self.cache_catalog`
-
-            data: the dataframe to store
-        """
-        ...
-
-    @abstractmethod
-    def evaluate_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instructions_to_evaluate: Optional[Iterable[Instruction]] = None,
-    ) -> Mapping[UUID, DataFrame]:
-        """Evaluate the IRGraph
-
-        Parameters:
-
-            graph: The evaluate IRGraph
-
-            instructions_to_evaluate: instructions to evaluate and return; by default, it will be all Return instructions in the graph
-
-        Returns:
-
-            DataFrames for each instruction in instructions_to_evaluate.
-        """
-        ...
-
-    @abstractmethod
-    def explain_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instructions_to_explain: Optional[Iterable[Instruction]] = None,
-    ) -> Mapping[UUID, GraphletExplanation]:
-        """Explain how to evaluate the IRGraph
-
-        Parameters:
-
-            graph: The evaluable IRGraph
-
-            instructions_to_explain: instructions to explain and return; by default, it will be all Return instructions in the graph
-
-        Returns:
-
-            GraphletExplanation (a Kestrel Display object) for each instruction in instructions_to_explain.
-        """
-        ...
-
-    def cache_catalog_to_json(self) -> str:
-        """Serialize the cache catalog to a JSON string"""
-        return json.dumps(self.cache_catalog)
diff --git a/packages-nextgen/kestrel_core/src/kestrel/interface/codegen/__init__.py b/packages-nextgen/kestrel_core/src/kestrel/interface/codegen/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/interface/codegen/dataframe.py b/packages-nextgen/kestrel_core/src/kestrel/interface/codegen/dataframe.py
deleted file mode 100644
index 21ed706e..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/interface/codegen/dataframe.py
+++ /dev/null
@@ -1,143 +0,0 @@
-import sys
-import inspect
-import re
-import operator
-import functools
-from typeguard import typechecked
-from pandas import DataFrame, Series
-from typing import Callable
-
-from kestrel.ir.instructions import (
-    SourceInstruction,
-    TransformingInstruction,
-    Construct,
-    Limit,
-    ProjectAttrs,
-    ProjectEntity,
-    Filter,
-)
-from kestrel.ir.filter import (
-    FExpression,
-    BoolExp,
-    MultiComp,
-    StrCompOp,
-    NumCompOp,
-    ExpOp,
-    ListOp,
-)
-
-
-@typechecked
-def evaluate_source_instruction(instruction: SourceInstruction) -> DataFrame:
-    eval_func = _select_eval_func(instruction.instruction)
-    return eval_func(instruction)
-
-
-@typechecked
-def evaluate_transforming_instruction(
-    instruction: TransformingInstruction, dataframe: DataFrame
-) -> DataFrame:
-    eval_func = _select_eval_func(instruction.instruction)
-    return eval_func(instruction, dataframe)
-
-
-@typechecked
-def _select_eval_func(instruction_name: str) -> Callable:
-    eval_funcs = inspect.getmembers(sys.modules[__name__], inspect.isfunction)
-    try:
-        _funcs = filter(lambda x: x[0] == "_eval_" + instruction_name, eval_funcs)
-        return next(_funcs)[1]
-    except StopIteration:
-        raise NotImplementedError(
-            f"evaluation function for {instruction_name} in dataframe cache"
-        )
-
-
-@typechecked
-def _eval_Construct(instruction: Construct) -> DataFrame:
-    return DataFrame(instruction.data)
-
-
-@typechecked
-def _eval_Limit(instruction: Limit, dataframe: DataFrame) -> DataFrame:
-    return dataframe.head(instruction.num)
-
-
-@typechecked
-def _eval_ProjectAttrs(instruction: ProjectAttrs, dataframe: DataFrame) -> DataFrame:
-    return dataframe[instruction.attrs]
-
-
-@typechecked
-def _eval_ProjectEntity(instruction: ProjectEntity, dataframe: DataFrame) -> DataFrame:
-    # TODO
-    ...
-
-
-@typechecked
-def _eval_Filter(instruction: Filter, dataframe: DataFrame) -> DataFrame:
-    return dataframe[_eval_Filter_exp(instruction.exp, dataframe)]
-
-
-@typechecked
-def _eval_Filter_exp(exp: FExpression, dataframe: DataFrame) -> Series:
-    if isinstance(exp, BoolExp):
-        bs = _eval_Filter_exp_BoolExp(exp, dataframe)
-    elif isinstance(exp, MultiComp):
-        bss = [xs for xs in _eval_Filter_exp(exp.comps, dataframe)]
-        if exp.op == ExpOp.AND:
-            bs = functools.reduce(lambda x, y: x & y, bss)
-        elif exp.op == ExpOp.OR:
-            bs = functools.reduce(lambda x, y: x | y, bss)
-        else:
-            raise NotImplementedError("unkown kestrel.ir.filter.ExpOp type")
-    else:
-        bs = _eval_Filter_exp_Comparison(exp, dataframe)
-    return bs
-
-
-@typechecked
-def _eval_Filter_exp_BoolExp(boolexp: BoolExp, dataframe: DataFrame) -> Series:
-    if boolexp.op == ExpOp.AND:
-        bs = _eval_Filter_exp(boolexp.lhs, dataframe) & _eval_Filter_exp(
-            boolexp.rhs, dataframe
-        )
-    elif boolexp.op == ExpOp.OR:
-        bs = _eval_Filter_exp(boolexp.lhs, dataframe) | _eval_Filter_exp(
-            boolexp.rhs, dataframe
-        )
-    else:
-        raise NotImplementedError("unkown kestrel.ir.filter.ExpOp type")
-    return bs
-
-
-@typechecked
-def _eval_Filter_exp_Comparison(
-    c: FExpression,
-    dataframe: DataFrame,
-) -> Series:
-    comp2func = {
-        NumCompOp.EQ: operator.eq,
-        NumCompOp.NEQ: operator.ne,
-        NumCompOp.LT: operator.gt,  # value first in functools.partial
-        NumCompOp.LE: operator.ge,  # value first in functools.partial
-        NumCompOp.GT: operator.lt,  # value first in functools.partial
-        NumCompOp.GE: operator.le,  # value first in functools.partial
-        StrCompOp.EQ: operator.eq,
-        StrCompOp.NEQ: operator.ne,
-        StrCompOp.LIKE: lambda w, x: bool(
-            re.search(w.replace(".", r"\.").replace("%", ".*?"), x)
-        ),
-        StrCompOp.NLIKE: lambda w, x: not bool(
-            re.search(w.replace(".", r"\.").replace("%", ".*?"), x)
-        ),
-        StrCompOp.MATCHES: lambda w, x: bool(re.search(w, x)),
-        StrCompOp.NMATCHES: lambda w, x: not bool(re.search(w, x)),
-        ListOp.IN: lambda w, x: x in w,
-        ListOp.NIN: lambda w, x: x not in w,
-    }
-
-    try:
-        return dataframe[c.field].apply(functools.partial(comp2func[c.op], c.value))
-    except KeyError:
-        raise NotImplementedError(f"unkown kestrel.ir.filter.*Op type: {c.op}")
diff --git a/packages-nextgen/kestrel_core/src/kestrel/interface/codegen/kql.py b/packages-nextgen/kestrel_core/src/kestrel/interface/codegen/kql.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/interface/codegen/sql.py b/packages-nextgen/kestrel_core/src/kestrel/interface/codegen/sql.py
deleted file mode 100644
index 75f97608..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/interface/codegen/sql.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import logging
-from functools import reduce
-from typing import Callable
-
-from sqlalchemy import and_, column, or_, select, FromClause, asc, desc
-from sqlalchemy.engine import Compiled, default
-from sqlalchemy.sql.elements import BinaryExpression, BooleanClauseList
-from sqlalchemy.sql.expression import ColumnClause, ColumnOperators
-from sqlalchemy.sql.selectable import Select
-from typeguard import typechecked
-
-from kestrel.ir.filter import (
-    BoolExp,
-    ExpOp,
-    FComparison,
-    ListOp,
-    MultiComp,
-    NumCompOp,
-    StrComparison,
-    StrCompOp,
-)
-from kestrel.ir.instructions import (
-    Filter,
-    Instruction,
-    Limit,
-    Offset,
-    ProjectAttrs,
-    ProjectEntity,
-    Sort,
-    SortDirection,
-)
-
-
-_logger = logging.getLogger(__name__)
-
-# SQLAlchemy comparison operator functions
-comp2func = {
-    NumCompOp.EQ: ColumnOperators.__eq__,
-    NumCompOp.NEQ: ColumnOperators.__ne__,
-    NumCompOp.LT: ColumnOperators.__lt__,
-    NumCompOp.LE: ColumnOperators.__le__,
-    NumCompOp.GT: ColumnOperators.__gt__,
-    NumCompOp.GE: ColumnOperators.__ge__,
-    StrCompOp.EQ: ColumnOperators.__eq__,
-    StrCompOp.NEQ: ColumnOperators.__ne__,
-    StrCompOp.LIKE: ColumnOperators.like,
-    StrCompOp.NLIKE: ColumnOperators.not_like,
-    StrCompOp.MATCHES: ColumnOperators.regexp_match,
-    StrCompOp.NMATCHES: ColumnOperators.regexp_match,  # Caller must negate
-    ListOp.IN: ColumnOperators.in_,
-    ListOp.NIN: ColumnOperators.not_in,
-}
-
-
-@typechecked
-def _render_comp(comp: FComparison) -> BinaryExpression:
-    col: ColumnClause = column(comp.field)
-    if comp.op == StrCompOp.NMATCHES:
-        return ~comp2func[comp.op](col, comp.value)
-    return comp2func[comp.op](col, comp.value)
-
-
-@typechecked
-def _render_multi_comp(comps: MultiComp) -> BooleanClauseList:
-    op = and_ if comps.op == ExpOp.AND else or_
-    return reduce(op, map(_render_comp, comps.comps))
-
-
-@typechecked
-class SqlTranslator:
-    def __init__(
-        self,
-        dialect: default.DefaultDialect,
-        timefmt: Callable,
-        timestamp: str,
-        from_obj: FromClause,
-    ):
-        # SQLAlchemy Dialect object (e.g. from sqlalchemy.dialects import sqlite; sqlite.dialect())
-        self.dialect = dialect
-
-        # Time formatting function for datasource
-        self.timefmt = timefmt
-
-        # Primary timestamp field in target table
-        self.timestamp = timestamp
-
-        # SQLAlchemy statement object
-        self.query: Select = select("*").select_from(from_obj)
-
-    def _render_exp(self, exp: BoolExp) -> BooleanClauseList:
-        if isinstance(exp.lhs, BoolExp):
-            lhs = self._render_exp(exp.lhs)
-        elif isinstance(exp.lhs, MultiComp):
-            lhs = _render_multi_comp(exp.lhs)
-        else:
-            lhs = _render_comp(exp.lhs)
-        if isinstance(exp.rhs, BoolExp):
-            rhs = self._render_exp(exp.rhs)
-        elif isinstance(exp.rhs, MultiComp):
-            rhs = _render_multi_comp(exp.rhs)
-        else:
-            rhs = _render_comp(exp.rhs)
-        return and_(lhs, rhs) if exp.op == ExpOp.AND else or_(lhs, rhs)
-
-    def add_Filter(self, filt: Filter) -> None:
-        if filt.timerange.start:
-            # Convert the timerange to the appropriate pair of comparisons
-            start_comp = StrComparison(
-                self.timestamp, ">=", self.timefmt(filt.timerange.start)
-            )
-            stop_comp = StrComparison(
-                self.timestamp, "<", self.timefmt(filt.timerange.stop)
-            )
-            # AND them together
-            time_exp = BoolExp(start_comp, ExpOp.AND, stop_comp)
-            # AND that with any existing filter expression
-            exp = BoolExp(filt.exp, ExpOp.AND, time_exp)
-        else:
-            exp = filt.exp
-        if isinstance(exp, BoolExp):
-            comp = self._render_exp(exp)
-        elif isinstance(exp, MultiComp):
-            comp = _render_multi_comp(exp)
-        else:
-            comp = _render_comp(exp)
-        self.query = self.query.where(comp)
-
-    def add_ProjectAttrs(self, proj: ProjectAttrs) -> None:
-        cols = [column(col) for col in proj.attrs]
-        self.query = self.query.with_only_columns(*cols)  # TODO: mapping?
-
-    def add_ProjectEntity(self, proj: ProjectEntity) -> None:
-        self.query = self.query.with_only_columns(
-            column(proj.entity_type)
-        )  # TODO: mapping?
-
-    def add_Limit(self, lim: Limit) -> None:
-        self.query = self.query.limit(lim.num)
-
-    def add_Offset(self, offset: Offset) -> None:
-        self.query = self.query.offset(offset.num)
-
-    def add_Sort(self, sort: Sort) -> None:
-        col = column(sort.attribute)
-        order = asc(col) if sort.direction == SortDirection.ASC else desc(col)
-        self.query = self.query.order_by(order)
-
-    def add_instruction(self, i: Instruction) -> None:
-        inst_name = i.instruction
-        method_name = f"add_{inst_name}"
-        method = getattr(self, method_name)
-        if not method:
-            raise NotImplementedError(f"SqlTranslator.{method_name}")
-        method(i)
-
-    def result(self) -> Compiled:
-        # TODO: two projections, e.g., ProjectAttrs after ProjectEntity
-        return self.query.compile(dialect=self.dialect)
-
-    def result_w_literal_binds(self) -> Compiled:
-        # full SQL query with literal binds showing, i.e., IN [99, 51], not IN [?, ?]
-        # this is for debug display, not used by an sqlalchemy driver to execute
-        return self.query.compile(
-            dialect=self.dialect, compile_kwargs={"literal_binds": True}
-        )
diff --git a/packages-nextgen/kestrel_core/src/kestrel/interface/manager.py b/packages-nextgen/kestrel_core/src/kestrel/interface/manager.py
deleted file mode 100644
index b5fd0904..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/interface/manager.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from __future__ import annotations
-import importlib
-import pkgutil
-import logging
-import inspect
-import sys
-import itertools
-from copy import copy
-from typeguard import typechecked
-from typing import Mapping, Iterable, Type
-
-from kestrel.exceptions import (
-    InterfaceNotConfigured,
-    InterfaceNotFound,
-    InvalidInterfaceImplementation,
-    ConflictingInterfaceScheme,
-)
-from kestrel.interface.base import MODULE_PREFIX, AbstractInterface
-from kestrel.config.internal import CACHE_INTERFACE_IDENTIFIER
-
-
-_logger = logging.getLogger(__name__)
-
-
-# basically a scheme to interface mapping
-@typechecked
-class InterfaceManager(Mapping):
-    def __init__(self, init_interfaces: Iterable[AbstractInterface] = []):
-        interface_classes = _load_interface_classes()
-        self.interfaces = list(init_interfaces)  # copy/recreate the list
-        for iface_cls in interface_classes:
-            try:
-                iface = iface_cls()
-                _logger.debug(f"Initialize interface {iface_cls.__name__}")
-                self.interfaces.append(iface)
-            except InterfaceNotConfigured as e:
-                _logger.debug(f"Interface {iface_cls.__name__} not configured; ignored")
-
-    def __getitem__(self, scheme: str) -> AbstractInterface:
-        for interface in self.interfaces:
-            if scheme in interface.schemes():
-                return interface
-        else:
-            raise InterfaceNotFound(f"no interface loaded for scheme {scheme}")
-
-    def __iter__(self) -> Iterable[str]:
-        return itertools.chain(*[i.schemes() for i in self.interfaces])
-
-    def __len__(self) -> int:
-        return sum(1 for _ in iter(self))
-
-    def copy_with_virtual_cache(self) -> InterfaceManager:
-        im = copy(self)
-        # shallow copy refers to the same list, so create/copy a new one
-        im.interfaces = copy(im.interfaces)
-        # now swap in virtual cache
-        cache = im[CACHE_INTERFACE_IDENTIFIER]
-        im.interfaces.remove(cache)
-        im.interfaces.append(cache.get_virtual_copy())
-        return im
-
-    def del_cache(self):
-        cache = self[CACHE_INTERFACE_IDENTIFIER]
-        self.interfaces.remove(cache)
-        del cache
-
-
-def _load_interface_classes():
-    interface_clss = []
-    for itf_pkg_name in _list_interface_pkg_names():
-        mod = importlib.import_module(itf_pkg_name)
-        _logger.debug(f"Imported {mod} from package {itf_pkg_name}")
-        cls = inspect.getmembers(
-            sys.modules[itf_pkg_name], _is_class(AbstractInterface)
-        )
-        if not cls:
-            raise InvalidInterfaceImplementation(
-                f'no interface class found in package "{itf_pkg_name}"'
-            )
-        elif len(cls) > 1:
-            raise InvalidInterfaceImplementation(
-                f'more than one interface class found in package "{itf_pkg_name}"'
-            )
-        else:
-            interface_cls = cls[0][1]
-            _guard_scheme_conflict(interface_cls, interface_clss)
-            interface_clss.append(interface_cls)
-    return interface_clss
-
-
-def _list_interface_pkg_names():
-    pkg_names = [x.name for x in pkgutil.iter_modules()]
-    itf_names = [pkg for pkg in pkg_names if pkg.startswith(MODULE_PREFIX)]
-    return itf_names
-
-
-def _is_class(cls):
-    return lambda obj: inspect.isclass(obj) and obj.__bases__[0] == cls
-
-
-@typechecked
-def _guard_scheme_conflict(
-    new_interface: Type[AbstractInterface],
-    interfaces: Iterable[Type[AbstractInterface]],
-):
-    for interface in interfaces:
-        for scheme_new in new_interface.schemes():
-            for scheme_old in interface.schemes():
-                if scheme_new == scheme_old:
-                    raise ConflictingInterfaceScheme(
-                        f"scheme: {scheme_new} conflicting between {new_interface.__name__} and {interface.__name__}"
-                    )
diff --git a/packages-nextgen/kestrel_core/src/kestrel/interface/translation/__init__.py b/packages-nextgen/kestrel_core/src/kestrel/interface/translation/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/interface/translation/query/__init__.py b/packages-nextgen/kestrel_core/src/kestrel/interface/translation/query/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/interface/translation/result/__init__.py b/packages-nextgen/kestrel_core/src/kestrel/interface/translation/result/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/ir/__init__.py b/packages-nextgen/kestrel_core/src/kestrel/ir/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/ir/filter.py b/packages-nextgen/kestrel_core/src/kestrel/ir/filter.py
deleted file mode 100644
index ebdd6856..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/ir/filter.py
+++ /dev/null
@@ -1,196 +0,0 @@
-from __future__ import annotations
-
-from typeguard import typechecked
-from dataclasses import dataclass
-from datetime import datetime
-from enum import Enum
-from typing import List, Optional, Union, Iterable, Any, Callable
-
-from mashumaro.mixins.json import DataClassJSONMixin
-
-
-class NumCompOp(str, Enum):
-    """Numerical comparison operators (for int and float)"""
-
-    EQ = "="
-    NEQ = "!="
-    LT = "<"
-    LE = "<="
-    GT = ">"
-    GE = ">="
-
-
-@dataclass
-class IntComparison(DataClassJSONMixin):
-    """Integer comparison expression"""
-
-    field: str
-    op: NumCompOp
-    value: int
-
-
-@dataclass
-class FloatComparison(DataClassJSONMixin):
-    """Floating point comparison expression"""
-
-    field: str
-    op: NumCompOp
-    value: float
-
-
-class StrCompOp(str, Enum):
-    """String comparison operators"""
-
-    EQ = "="
-    NEQ = "!="
-    LIKE = "LIKE"
-    NLIKE = "NOT LIKE"
-    MATCHES = "MATCHES"
-    NMATCHES = "NOT MATCHES"
-
-
-@dataclass
-class StrComparison(DataClassJSONMixin):
-    """String comparison expression"""
-
-    field: str
-    op: StrCompOp
-    value: str
-
-
-class ListOp(str, Enum):
-    """List membership operator"""
-
-    IN = "IN"
-    NIN = "NOT IN"
-
-
-@dataclass
-class ListStrComparison(DataClassJSONMixin):
-    """List of strings membership comparison expression"""
-
-    field: str
-    op: ListOp
-    value: List[str]
-
-
-@dataclass
-class ListIntComparison(DataClassJSONMixin):
-    """List of ints membership comparison expression"""
-
-    field: str
-    op: ListOp
-    value: List[int]
-
-
-@dataclass
-class ListComparison(DataClassJSONMixin):
-    """List membership comparison expression"""
-
-    field: str
-    op: ListOp
-    value: Union[List[int], List[str]]
-
-
-# frozen=True for generating __hash__() method
-@dataclass(frozen=True)
-class ReferenceValue(DataClassJSONMixin):
-    """Value for reference"""
-
-    reference: str
-    attribute: Optional[str]
-
-
-@dataclass
-class RefComparison(DataClassJSONMixin):
-    """Referred variable comparison"""
-
-    field: str
-    op: ListOp
-    value: ReferenceValue
-
-
-class ExpOp(str, Enum):
-    """Boolean expression operator"""
-
-    AND = "AND"
-    OR = "OR"
-
-
-@dataclass
-class MultiComp(DataClassJSONMixin):
-    """Boolean expression of multiple comparisons.
-
-    The single operator applies to ALL comparisons, so `OR` acts like `any` and `AND` acts like `all`.
-    """
-
-    op: ExpOp
-    comps: List[
-        Union[
-            IntComparison, FloatComparison, StrComparison, ListComparison, RefComparison
-        ]
-    ]
-
-
-@dataclass
-class BoolExp(DataClassJSONMixin):
-    """Binary boolean expression of comparisons"""
-
-    lhs: FExpression
-    op: ExpOp
-    rhs: FExpression
-
-
-@dataclass
-class TimeRange(DataClassJSONMixin):
-    """The time range of interest"""
-
-    start: Optional[datetime] = None
-    stop: Optional[datetime] = None
-
-
-FExpression = Union[
-    IntComparison,
-    FloatComparison,
-    StrComparison,
-    ListComparison,
-    RefComparison,
-    MultiComp,
-    BoolExp,
-]
-
-
-FComparison = Union[
-    IntComparison,
-    FloatComparison,
-    StrComparison,
-    ListComparison,
-    RefComparison,
-    MultiComp,
-]
-
-
-@typechecked
-def get_references_from_exp(exp: FExpression) -> Iterable[ReferenceValue]:
-    if isinstance(exp, RefComparison):
-        yield exp.value
-    elif isinstance(exp, BoolExp):
-        yield from get_references_from_exp(exp.lhs)
-        yield from get_references_from_exp(exp.rhs)
-    elif isinstance(exp, MultiComp):
-        for comp in exp.comps:
-            yield from get_references_from_exp(comp)
-
-
-@typechecked
-def resolve_reference_with_function(
-    exp: FExpression, f: Callable[[ReferenceValue], Any]
-):
-    if isinstance(exp, RefComparison):
-        exp.value = f(exp.value)
-    elif isinstance(exp, BoolExp):
-        resolve_reference_with_function(exp.lhs, f)
-        resolve_reference_with_function(exp.rhs, f)
-    elif isinstance(exp, MultiComp):
-        for comp in exp.comps:
-            resolve_reference_with_function(comp, f)
diff --git a/packages-nextgen/kestrel_core/src/kestrel/ir/graph.py b/packages-nextgen/kestrel_core/src/kestrel/ir/graph.py
deleted file mode 100644
index ddc41b7d..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/ir/graph.py
+++ /dev/null
@@ -1,832 +0,0 @@
-from __future__ import annotations
-from typeguard import typechecked
-from typing import Any, Iterable, Tuple, Mapping, MutableMapping, Union, Optional
-from collections import defaultdict
-from itertools import combinations
-from uuid import UUID
-import networkx
-import json
-from kestrel.ir.instructions import (
-    Instruction,
-    TransformingInstruction,
-    SolePredecessorTransformingInstruction,
-    IntermediateInstruction,
-    SourceInstruction,
-    Variable,
-    DataSource,
-    Reference,
-    Return,
-    Filter,
-    ProjectAttrs,
-    instruction_from_dict,
-)
-from kestrel.ir.filter import ReferenceValue
-from kestrel.exceptions import (
-    InstructionNotFound,
-    InvalidSeralizedGraph,
-    VariableNotFound,
-    ReferenceNotFound,
-    DataSourceNotFound,
-    DuplicatedVariable,
-    DuplicatedReference,
-    DuplicatedDataSource,
-    DuplicatedSingletonInstruction,
-    MultiInterfacesInGraph,
-    MultiSourcesInGraph,
-    InevaluableInstruction,
-    LargerThanOneIndegreeInstruction,
-    DuplicatedReferenceInFilter,
-    MissingReferenceInFilter,
-    DanglingReferenceInFilter,
-    DanglingFilter,
-)
-from kestrel.config.internal import CACHE_INTERFACE_IDENTIFIER
-
-
-@typechecked
-def compose(g: IRGraph, h: IRGraph) -> IRGraph:
-    g.update(h)
-    return g
-
-
-@typechecked
-def union(g: IRGraph, h: IRGraph) -> IRGraph:
-    return compose(g, h)
-
-
-@typechecked
-class IRGraph(networkx.DiGraph):
-    def __init__(
-        self, serialized_graph: Union[None, str, Mapping[str, Iterable[Mapping]]] = None
-    ):
-        super().__init__()
-        if serialized_graph:
-            if isinstance(serialized_graph, str):
-                graph_in_dict = json.loads(serialized_graph)
-            else:
-                graph_in_dict = serialized_graph
-            self._from_dict(graph_in_dict)
-
-    def add_node(
-        self,
-        node: Instruction,
-        dependent_node: Optional[Instruction] = None,
-        deref: bool = True,
-    ) -> Instruction:
-        """General adding node/instruction operation
-
-        Parameters:
-            node: the instruction to add
-            dependent_node: the dependent instruction if node is a TransformingInstruction
-            deref: whether to dereference Reference instruction (only useful for if node is Reference)
-
-        Returns:
-            The node added
-        """
-        if node not in self:
-            if isinstance(node, TransformingInstruction):
-                node = self._add_node_with_dependent_node(node, dependent_node)
-            else:
-                node = self._add_node(node, deref)
-        return node
-
-    def add_nodes_from(self, nodes: Iterable[Instruction], deref: bool = True):
-        """Add nodes in a list
-
-        Parameters:
-            nodes: the list of nodes/instructions to add
-            deref: whether to deref Reference node
-        """
-        for node in nodes:
-            self._add_node(node, deref)
-
-    def add_edge(self, u: Instruction, v: Instruction, deref: bool = False):
-        """Add edge (add node if not exist)
-
-        Parameters:
-            u: the source of the edge
-            v: the target of the edge
-            deref: whether to deref Reference node
-        """
-        ux = self._add_node(u, deref)
-        vx = self._add_node(v, deref)
-        super().add_edge(ux, vx)
-
-    def add_edges_from(
-        self, edges: Iterable[Tuple[Instruction, Instruction]], deref: bool = False
-    ):
-        """Add edges in a list
-
-        Parameters:
-            edges: the edges to add
-            deref: whether to deref Reference node
-        """
-        for u, v in edges:
-            self.add_edge(u, v, deref)
-
-    def copy(self):
-        """Copy the IRGraph with all nodes as reference (not deepcopy)
-
-        Support subclass of IRGraph to be copied.
-        """
-        g = IRGraph()
-        g.update(self)
-
-        # subclass support
-        if type(g) != type(self):
-            g = type(self)(g)
-
-        return g
-
-    def deepcopy(self):
-        """Copy the IRGraph with all nodes copied as new objects
-
-        Support subclass of IRGraph to be deep copied.
-        """
-        g = IRGraph()
-        o2n = {n: n.deepcopy() for n in self.nodes()}
-        for u, v in self.edges():
-            g.add_edge(o2n[u], o2n[v])
-        g.add_nodes_from([o2n[n] for n in self.nodes() if self.degree(n) == 0])
-
-        # subclass support
-        if type(g) != type(self):
-            g = type(self)(g)
-
-        return g
-
-    def get_node_by_id(self, ux: Union[UUID, str]) -> Instruction:
-        """Get node by ID
-
-        Parameters:
-            ux: node ID
-
-        Returns:
-            The Kestrel instruction (node in IRGraph)
-        """
-        u = UUID(ux) if isinstance(ux, str) else ux
-        try:
-            return next(filter(lambda n: n.id == u, self.nodes()))
-        except StopIteration:
-            raise InstructionNotFound(u)
-
-    def get_nodes_by_type(self, ntype: type) -> Iterable[Instruction]:
-        """Get nodes by type
-
-        Parameters:
-            ntype: node/instruction type
-
-        Returns:
-            The list of nodes/instructions
-        """
-        return [n for n in self.nodes() if isinstance(n, ntype)]
-
-    def get_nodes_by_type_and_attributes(
-        self, ntype: type, attr2val: Mapping[str, Union[str, bool, int]]
-    ) -> Iterable[Instruction]:
-        """Get nodes by both type and attributes/values
-
-        Parameters:
-            ntype: node/instruction type
-            attr2val: instruction attribute/value dictionary
-
-        Returns:
-            The list of nodes/instructions
-        """
-        nodes = self.get_nodes_by_type(ntype)
-        return [
-            n
-            for n in nodes
-            if all([getattr(n, k, None) == v for (k, v) in attr2val.items()])
-        ]
-
-    def get_variable(self, var_name: str) -> Variable:
-        """Get a Kestrel variable by its name
-
-        Parameters:
-            var_name: variable name
-
-        Returns:
-            The Kestrel variable given its name
-        """
-        xs = self.get_nodes_by_type_and_attributes(Variable, {"name": var_name})
-        if xs:
-            if len({x.version for x in xs}) < len(xs):
-                raise DuplicatedVariable(var_name)
-            else:
-                xs.sort(key=lambda x: x.version)
-                return xs[-1]
-        else:
-            raise VariableNotFound(var_name)
-
-    def get_variables(self) -> Iterable[Variable]:
-        """Get all variables
-
-        This method returns a list of variables, equivalent to *Symbol Table* used in traditional (non-graph-IR) language compilers. Shadowed variables (replaced by new variables with same names) will not be returned.
-
-        Returns:
-            The list of all Kestrel variables in this huntflow.
-        """
-        var_names = {v.name for v in self.get_nodes_by_type(Variable)}
-        return [self.get_variable(var_name) for var_name in var_names]
-
-    def add_variable(
-        self, vx: Union[str, Variable], dependent_node: Instruction
-    ) -> Variable:
-        """Create new variable (if needed) and add to IRGraph
-
-        Parameters:
-            vx: variable name (str) or already created node (Variable)
-            dependent_node: the instruction to which the variable refer
-
-        Returns:
-            The variable node created/added
-        """
-        v = Variable(vx) if isinstance(vx, str) else vx
-        return self.add_node(v, dependent_node)
-
-    def get_reference(self, ref_name: str) -> Reference:
-        """Get a Kestrel reference by its name
-
-        Parameters:
-            ref_name: reference name
-
-        Returns:
-            The Reference node
-        """
-        xs = self.get_nodes_by_type_and_attributes(Reference, {"name": ref_name})
-        if xs:
-            if len(xs) > 1:
-                raise DuplicatedReference(ref_name)
-            else:
-                return xs.pop()
-        else:
-            raise ReferenceNotFound(ref_name)
-
-    def get_references(self) -> Iterable[Reference]:
-        """Get all references
-
-        Returns:
-            The list of reference nodes
-        """
-        ref_names = {r.name for r in self.get_nodes_by_type(Reference)}
-        return [self.get_reference(ref_name) for ref_name in ref_names]
-
-    def add_reference(
-        self, rx: Union[str, Reference], deref: bool = True
-    ) -> Union[Reference, Variable]:
-        """Create or add new reference node to IRGraph
-
-        The reference node will be derefed if the flag is specified.
-
-        Parameters:
-            rx: reference name (str) or already created node (Reference)
-            deref: whether to deref when adding node
-
-        Returns:
-            The reference node created/added
-        """
-        r = Reference(rx) if isinstance(rx, str) else rx
-        return self.add_node(r, deref)
-
-    def get_datasource(self, interface: str, datasource: str) -> DataSource:
-        """Get a Kestrel datasource by its URI
-
-        Parameters:
-            interface: the datasource interface name
-            datasource: the datasource name under the interface
-
-        Returns:
-            The datasource
-        """
-        xs = self.get_nodes_by_type_and_attributes(
-            DataSource, {"interface": interface, "datasource": datasource}
-        )
-        if xs:
-            if len(xs) > 1:
-                raise DuplicatedDataSource(interface, datasource)
-            else:
-                return xs.pop()
-        else:
-            raise DataSourceNotFound(interface, datasource)
-
-    def get_datasources(self) -> Iterable[DataSource]:
-        """Get all datasources
-
-        Returns:
-            The list of data sources
-        """
-        xs = self.get_nodes_by_type(DataSource)
-
-        # to check for duplicated datasources
-
-        return xs
-
-    def add_datasource(
-        self, sx: Union[str, DataSource], default_interface: Optional[str] = None
-    ) -> DataSource:
-        """Create new datasource (if needed) and add to IRGraph if not exist
-
-        Parameters:
-            sx: the full URI of the datasource (str) or already created node (DataSource)
-            default_interface: default interface name
-
-        Returns:
-            The DataSource node found or added
-        """
-        s = DataSource(sx, default_interface) if isinstance(sx, str) else sx
-        return self.add_node(s)
-
-    def get_returns(self) -> Iterable[Return]:
-        """Get all return nodes
-
-        Returns:
-            The list of return nodes
-        """
-        return sorted(self.get_nodes_by_type(Return), key=lambda x: x.sequence)
-
-    def get_max_return_sequence(self) -> int:
-        """Get the largest sequence number of all Returns
-
-        Returns:
-            The largest sequence number of all Return instruction
-        """
-        return max(map(lambda x: x.sequence, self.get_returns()), default=-1)
-
-    def add_return(self, dependent_node: Instruction) -> Return:
-        """Create new Return instruction and add to IRGraph
-
-        Parameters:
-            dependent_node: the instruction to hold return
-
-        Returns:
-            The return node created/added
-        """
-        return self.add_node(Return(), dependent_node)
-
-    def get_sink_nodes(self) -> Iterable[Instruction]:
-        """Get all sink nodes (node with no successors)
-
-        Returns:
-            The list of sink nodes
-        """
-        return [n for n in self.nodes() if self.out_degree(n) == 0]
-
-    def get_trunk_n_branches(
-        self, node: TransformingInstruction
-    ) -> (Instruction, Mapping[ReferenceValue, Instruction]):
-        """Get the trunk and branches paths for instruction
-
-        For trunk path, return the tail node; for each branch, return the tail
-        node of the branchin mapping from reference to node.
-
-        Parameters:
-            node: the instruction node
-
-        Returns:
-            (tail node for trunk, ref to branch tail node mapping)
-        """
-        ps = list(self.predecessors(node))
-        pps = [(p, pp) for p in self.predecessors(node) for pp in self.predecessors(p)]
-
-        # may need to add a patch in find_dependent_subgraphs_of_node()
-        # for each new case added in the if/elif, e.g., FIlter
-        if isinstance(node, SolePredecessorTransformingInstruction):
-            if len(ps) > 1:
-                raise LargerThanOneIndegreeInstruction()
-            else:
-                return ps[0], {}
-        elif isinstance(node, Filter):
-            r2n = {}
-            for rv in node.get_references():
-                ppfs = [
-                    (p, pp)
-                    for p, pp in pps
-                    if isinstance(p, ProjectAttrs)
-                    and isinstance(pp, (Variable, Reference))
-                    and p.attrs == [rv.attribute]
-                    and pp.name == rv.reference
-                ]
-                if not ppfs:
-                    raise MissingReferenceInFilter(rv, node, pps)
-                elif len(ppfs) > 1:
-                    raise DuplicatedReferenceInFilter(rv, node, pps)
-                else:
-                    p = ppfs[0][0]
-                    r2n[rv] = p
-                    ps.remove(p)
-            if len(ps) == 0:
-                raise DanglingFilter()
-            elif len(ps) > 1:
-                raise DanglingReferenceInFilter(ps)
-            return ps[0], r2n
-        else:
-            raise NotImplementedError(f"unknown instruction type: {node}")
-
-    def update(self, ng: IRGraph):
-        """Extend the current IRGraph with a new IRGraph
-
-        Parameters:
-            ng: the new IRGraph to merge/combine/union
-        """
-        # After we add new variable nodes, we can no longer rely on
-        # self.get_variable() to get variables for de-referencing.
-        # Save the original variables first.
-        original_variables = {v.name: v for v in self.get_variables()}
-
-        # prepare new variables from ng before merge
-        # should not use ng.get_variable(),
-        # which does not cover all overridden variables
-        for nv in ng.get_nodes_by_type(Variable):
-            if nv.name in original_variables:
-                nv.version += original_variables[nv.name].version + 1
-
-        # prepare return sequence from ng before merge
-        return_max_sequence = self.get_max_return_sequence()
-        for nr in ng.get_returns():
-            nr.sequence += return_max_sequence + 1
-
-        # add refs first to deref correctly
-        # if any reference exist, it should be derefed before adding any variable
-        o2n_refs = {n: self._add_node(n) for n in ng.get_references()}
-        # add all nodes with dedup singleton node, e.g., SourceInstruction
-        o2n_nonrefs = {n: self._add_node(n) for n in ng.nodes() if n not in o2n_refs}
-
-        # overall old to new node mapping
-        o2n = {}
-        o2n.update(o2n_refs)
-        o2n.update(o2n_nonrefs)
-
-        # add all edges
-        self.add_edges_from([(o2n[u], o2n[v]) for (u, v) in ng.edges()])
-
-    def duplicate_dependent_subgraph_of_node(self, node: Instruction) -> IRGraph:
-        """Find and copy the dependent subgraph of a node (including the node)
-
-        Parameters:
-            node: instruction node to start
-
-        Returns:
-            A copy of the dependent subgraph (including the input node)
-        """
-        nodes = networkx.ancestors(self, node)
-        nodes.add(node)
-        return self.subgraph(nodes).copy()
-
-    def find_cached_dependent_subgraph_of_node(
-        self, node: Instruction, cache: MutableMapping[UUID, Any]
-    ) -> IRGraph:
-        """Return the cached dependent graph of the a node
-
-        Discard nodes and subgraphs before any cached nodes, e.g., Variables.
-
-        Parameters:
-            node: instruction node to start
-            cache: any type of node cache, e.g., content, SQL statement
-
-        Returns:
-            The pruned IRGraph without nodes before cached Variable nodes
-        """
-        g = self.duplicate_dependent_subgraph_of_node(node)
-        in_edges = [g.in_edges(n) for n in g.nodes() if n.id in cache]
-        g.remove_edges_from(set().union(*in_edges))
-
-        # important last step to discard any unconnected nodes/subgraphs prior to the dropped edges
-        return g.duplicate_dependent_subgraph_of_node(node)
-
-    def find_dependent_subgraphs_of_node(
-        self,
-        node: Instruction,
-        cache: MutableMapping[UUID, Any],
-    ) -> Iterable[IRGraphEvaluable]:
-        """Find dependency subgraphs that do not have further dependency
-
-        To evaluate a node, one needs to evaluate all nodes in its dependent
-        graph. However, not all nodes can be evaluated at once (e.g., impacted
-        by multiple interfaces). Some require more basic dependent subgraphs to
-        be evaluated first. This method segments the dependent graph of a node
-        and return the subgraphs that are IRGraphEvaluable. One can evaluate
-        the returns, cache them, and call this method again. After iterations
-        of return and evaluation of returned dependent subgraphs, the node can
-        finally be evaluated in the last return, which will just be a
-        IRGraphEvaluable at that time.
-
-        TODO: analytics node support
-
-        Parameters:
-            node: the instruction/node to generate dependent subgraphs for
-            cache: any type of node cache, e.g., content, SQL statement
-
-        Returns:
-            A list of subgraphs that do not have further dependency
-        """
-        _CII = CACHE_INTERFACE_IDENTIFIER
-
-        # the base graph to segment
-        g = self.find_cached_dependent_subgraph_of_node(node, cache)
-
-        # Mapping: {interface name: [impacted nodes]}
-        a2ns = defaultdict(set)
-        for n in g.get_nodes_by_type(SourceInstruction):
-            a2ns[n.interface].add(n)
-            a2ns[n.interface].update(networkx.descendants(g, n))
-
-        # all predecessor nodes to any interface impacted nodes
-        pns = set().union(*[set(g.predecessors(n)) for ns in a2ns.values() for n in ns])
-
-        # add non-source nodes to cache as default execution environment
-        # e.g., a path starting from a cached Variable
-        # nodes directly preceeding an interface impacted node do not need evaluation
-        cached_nodes = set([n for n in g.nodes() if n.id in cache])
-        for n in cached_nodes - pns:
-            a2ns[_CII].add(n)
-            a2ns[_CII].update(networkx.descendants(g, n))
-
-        # find all nodes that are affected by two or more interfaces
-        shared_impacted_nodes = set().union(
-            *[a2ns[ix] & a2ns[iy] for ix, iy in combinations(a2ns.keys(), 2)]
-        )
-
-        # unshared nodes for each interface
-        a2uns = {k: v - shared_impacted_nodes for k, v in a2ns.items()}
-
-        # handle direct predecessor node cached
-        # such nodes are required in building dep graphs around interfaces
-        # such nodes could be shared by multiple interfaces; can only handle now
-        for interface in set(a2uns) - set([_CII]):
-            ps = set().union(*[set(g.predecessors(n)) for n in a2uns[interface]])
-            a2uns[interface].update(ps & cached_nodes)
-
-        # a patch (corner case handling) for get_trunk_n_branches()
-        # add Variable/Reference node if succeeded by ProjectAttrs and Filter,
-        # which are in the dependent graph; the Variable is only needed by
-        # get_trunk_n_branches() as an auxiliary node
-        for interface in a2uns:
-            auxs = []
-            for n in a2uns[interface]:
-                if isinstance(n, ProjectAttrs):
-                    # need to search in `self`, not `g`, since the boundry of
-                    # `g` is cut by the cache
-                    p = next(self.predecessors(n))
-                    s = next(g.successors(n))
-                    if (
-                        isinstance(s, Filter)
-                        and isinstance(p, (Variable, Reference))
-                        and s in a2uns[interface]
-                    ):
-                        auxs.append(p)
-            a2uns[interface].update(auxs)
-
-        # remove dep graphs with only one node
-        # e.g., `ds://a` in "y = GET file FROM ds://a WHERE x = v.x"
-        # when v.x not in cache
-        dep_nodes = [ns for ns in a2uns.values() if len(ns) > 1]
-        # need to search in `self` due to the patch for get_trunk_n_branches()
-        dep_graphs = [
-            IRGraphEvaluable(self.subgraph(ns)).deepcopy() for ns in dep_nodes
-        ]
-
-        return dep_graphs
-
-    def find_simple_query_subgraphs(
-        self, cache: MutableMapping[UUID, Any]
-    ) -> Iterable[IRGraphSimpleQuery]:
-        """Find dependency subgraphs those are IRGraphSimpleQuery
-
-        Some interfaces, e.g., stix-shifter, build stateless query and do not
-        support JOIN or sub query/SELECT, so they can only evaluate a simple
-        SQL query around each source node. Use this method to prepare such tiny
-        graph segments for evaluation by the interface. The remaining of the
-        graph can be evaluated in cache.
-
-        Parameters:
-            cache: any type of node cache, e.g., content, SQL statement
-
-        Returns:
-            An iterator of simple-query subgraphs
-        """
-        for n in self.get_nodes_by_type(SourceInstruction):
-            for g in self._find_paths_from_node_to_a_variable(n, cache):
-                yield IRGraphSimpleQuery(g)
-
-    def _find_paths_from_node_to_a_variable(
-        self, node: Instruction, cache: MutableMapping[UUID, Any]
-    ) -> Iterable[IRGraph]:
-        """Find paths (linear IRGraph with directly attached cached nodes) from
-        the starting node to its closest variables
-
-        If the linear IRGraph has a dependent branch/path longer than a cached
-        node, this linear IRGraph cannot be used to build a IRGraphSimpleQuery;
-        it needs to generate a subquery for the branch.
-
-        Parameters:
-            node: the node to start path search
-            cache: any type of node cache, e.g., content, SQL statement
-
-        Returns:
-            An iterator of paths
-        """
-        # check whether the node has other uncached incoming nodes
-        # if no, this path can be a IRGraphSimpleQuery
-        if len([n for n in self.predecessors(node) if n.id not in cache]) <= 1:
-            # pcns: predecessor cached nodes
-            pcns = [n for n in self.predecessors(node) if n.id in cache]
-            for succ in self.successors(node):
-                if isinstance(succ, Variable):
-                    yield self.subgraph([succ, node] + pcns)
-                else:
-                    for succ_graph in self._find_paths_from_node_to_a_variable(
-                        succ, cache
-                    ):
-                        yield self.subgraph(list(succ_graph.nodes()) + [node] + pcns)
-
-    def to_dict(self) -> Mapping[str, Iterable[Mapping]]:
-        """Serialize to a Python dictionary (D3 graph format)
-
-        Returns:
-            The graph in a Python dictionary to be dumped as JSON string
-        """
-        nodes = [n.to_dict() for n in self.nodes()]
-        links = [{"source": str(u.id), "target": str(v.id)} for (u, v) in self.edges()]
-        return {"nodes": nodes, "links": links}
-
-    def to_json(self) -> str:
-        """Serialize to a Python JSON string (D3 graph format)
-
-        Returns:
-            The graph in a Python JSON string
-        """
-        return json.dumps(self.to_dict())
-
-    def _add_node(self, node: Instruction, deref: bool = True) -> Instruction:
-        """Add just the node
-
-        Dependency (if exists) not handled. Variable version and Return
-        sequence intentionally not handled here (handled in
-        _add_node_with_dependent_node()) for plain adding node opeartion used
-        by update().
-
-        Parameters:
-            node: the node/instruction to add
-            deref: whether to deref is a Reference node
-
-        Returns:
-            The node added or found or derefed
-        """
-        # test `node in self` is important
-        # there could be a Reference node already in graph, not to deref
-        if node not in self:
-            if isinstance(node, IntermediateInstruction):
-                if isinstance(node, Reference):
-                    if deref:
-                        try:
-                            v = self.get_variable(node.name)
-                        except VariableNotFound:
-                            # deref failed, add Reference node directly
-                            node = self._add_singleton_instruction(node)
-                        else:
-                            # deref succeed, no need to add node
-                            node = v
-                    else:
-                        node = self._add_singleton_instruction(node)
-                else:
-                    raise NotImplementedError(
-                        f"unknown IntermediateInstruction: {node}"
-                    )
-            elif isinstance(node, SourceInstruction):
-                node = self._add_singleton_instruction(node)
-            else:
-                super().add_node(node)
-        return node
-
-    def _add_singleton_instruction(self, node: Instruction) -> Instruction:
-        """Guard adding a singleton node
-
-        1. Singleton nodes are nodes that only has one copy in graph
-
-        2. A node that has no predecessors is a singleton node
-
-        Parameters:
-            node: the node/instruction to add
-
-        Returns:
-            The node added or found
-        """
-        xs = [
-            x
-            for x in self.nodes()
-            if x.has_same_content_as(node) and self.in_degree(x) == 0
-        ]
-        if xs:
-            if len(xs) > 1:
-                raise DuplicatedSingletonInstruction(node)
-            else:
-                node = xs.pop()
-        else:
-            super().add_node(node)
-        return node
-
-    def _add_node_with_dependent_node(
-        self, node: Instruction, dependent_node: Instruction
-    ) -> Instruction:
-        """Add node to graph with a dependent node
-
-        Variable version and Return sequence are handled here.
-
-        Parameters:
-            node: the node/instruction to add
-            dependent_node: the dependent node that should exist in the graph
-
-        Return:
-            The node added
-        """
-        if dependent_node not in self:
-            raise InstructionNotFound(dependent_node)
-        if node not in self:
-            if isinstance(node, Variable):
-                try:
-                    ve = self.get_variable(node.name)
-                except VariableNotFound:
-                    node.version = 0
-                else:
-                    node.version = ve.version + 1
-            if isinstance(node, Return):
-                node.sequence = self.get_max_return_sequence() + 1
-            # add_edge will add node first
-            self.add_edge(dependent_node, node)
-        return node
-
-    def _from_dict(self, graph_in_dict: Mapping[str, Iterable[Mapping]]):
-        """Deserialize from a Python dictionary (D3 graph format)
-
-        This method is an implicit constructor from a serialized graph.
-
-        Parameters:
-            graph_in_dict: the serialized graph in Python dictionary
-        """
-        nodes = graph_in_dict["nodes"]
-        edges = graph_in_dict["links"]
-        for n in nodes:
-            self._add_node(instruction_from_dict(n), False)
-        for e in edges:
-            try:
-                u = self.get_node_by_id(e["source"])
-                v = self.get_node_by_id(e["target"])
-            except InstructionNotFound as err:
-                raise InvalidSeralizedGraph()
-            else:
-                self.add_edge(u, v)
-
-
-@typechecked
-class IRGraphEvaluable(IRGraph):
-    """Evaluable IRGraph
-
-    An evaluable IRGraph is an IRGraph that
-
-        1. Only has one interface
-
-        2. No IntermediateInstruction node
-    """
-
-    def __init__(self, graph: Optional[IRGraph] = None):
-        super().__init__()
-
-        # need to initialize it before `self.update(graph)` below
-        self.interface = None
-
-        # update() will call _add_node() internally to set self.interface
-        if graph:
-            self.update(graph)
-
-        # all source nodes are already cached (no SourceInstruction)
-        if not self.interface:
-            self.interface = CACHE_INTERFACE_IDENTIFIER
-
-    def _add_node(self, node: Instruction, deref: bool = True) -> Instruction:
-        if isinstance(node, IntermediateInstruction):
-            raise InevaluableInstruction(node)
-        elif isinstance(node, SourceInstruction):
-            if self.interface:
-                if node.interface != self.interface:
-                    raise MultiInterfacesInGraph([self.interface, node.interface])
-            else:
-                self.interface = node.interface
-        return super()._add_node(node, deref)
-
-
-@typechecked
-class IRGraphSimpleQuery(IRGraphEvaluable):
-    """Simple Query IRGraph
-
-    A simple query IRGraph is an evaluable IRGraph that
-
-        1. It contains one source node
-
-        2. It can be compiled into a simple (not nested/joined) SQL query
-    """
-
-    def __init__(self, graph: Optional[IRGraph] = None):
-        if graph and len(graph.get_nodes_by_type(SourceInstruction)) > 1:
-            raise MultiSourcesInGraph()
-        super().__init__(graph)
diff --git a/packages-nextgen/kestrel_core/src/kestrel/ir/instructions.py b/packages-nextgen/kestrel_core/src/kestrel/ir/instructions.py
deleted file mode 100644
index 8b1aa1e3..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/ir/instructions.py
+++ /dev/null
@@ -1,228 +0,0 @@
-from __future__ import annotations
-
-import copy
-import inspect
-import json
-import sys
-import uuid
-from dataclasses import InitVar, dataclass, field, fields
-from enum import Enum
-from typing import Any, Iterable, List, Mapping, Optional, Type, Union
-
-from kestrel.__future__ import is_python_older_than_minor_version
-from kestrel.config.internal import CACHE_INTERFACE_IDENTIFIER
-from kestrel.exceptions import (
-    InvalidDataSource,
-    InvalidInstruction,
-    InvalidSeralizedInstruction,
-)
-from kestrel.ir.filter import (
-    FExpression,
-    ReferenceValue,
-    TimeRange,
-    get_references_from_exp,
-    resolve_reference_with_function,
-)
-from mashumaro.mixins.json import DataClassJSONMixin
-from typeguard import typechecked
-
-# https://stackoverflow.com/questions/70400639/how-do-i-get-python-dataclass-initvar-fields-to-work-with-typing-get-type-hints
-if is_python_older_than_minor_version(11):
-    InitVar.__call__ = lambda *args: None
-
-
-@dataclass
-class Instruction(DataClassJSONMixin):
-    id: uuid.UUID = field(init=False)
-    instruction: str = field(init=False)
-
-    def __post_init__(self):
-        # stable id during Instruction lifetime
-        self.id = uuid.uuid4()
-        self.instruction = self.__class__.__name__
-
-    def __eq__(self, other: Instruction):
-        return self.id == other.id
-
-    def __hash__(self):
-        # stable hash during Instruction lifetime
-        return self.id.int
-
-    def copy(self):
-        return copy.copy(self)
-
-    def deepcopy(self):
-        return copy.deepcopy(self)
-
-    def has_same_content_as(self, instruction: Instruction) -> bool:
-        if self.instruction == instruction.instruction:
-            flag = True
-            for f in fields(self):
-                if f.name != "id" and getattr(self, f.name) != getattr(
-                    instruction, f.name
-                ):
-                    flag = False
-        else:
-            flag = False
-        return flag
-
-
-class TransformingInstruction(Instruction):
-    """The instruction that builds/dependent on one or more instructions"""
-
-    pass
-
-
-class SolePredecessorTransformingInstruction(TransformingInstruction):
-    """The translating instruction whose indegree==1"""
-
-    pass
-
-
-class SourceInstruction(Instruction):
-    """The instruction that does not dependent on any instruction"""
-
-    interface: str
-
-
-class IntermediateInstruction(Instruction):
-    """The instruction that aids AST to Kestrel IR compilation"""
-
-    pass
-
-
-@dataclass(eq=False)
-class Return(SolePredecessorTransformingInstruction):
-    """The sink instruction that forces execution
-
-    Return is implemented as a TransformingInstruction so it triggers
-    IRGraph._add_node_with_dependent_node() in IRGraph.add_node()
-    """
-
-    # the order/sequence of return instruction in huntflow (source code)
-    sequence: int = 0
-
-
-@dataclass(eq=False)
-class Filter(TransformingInstruction):
-    exp: FExpression
-    timerange: TimeRange = field(default_factory=TimeRange)
-
-    # TODO: from_json() for self.exp
-
-    def get_references(self) -> Iterable[ReferenceValue]:
-        return get_references_from_exp(self.exp)
-
-    def resolve_references(self, f: Callable[[ReferenceValue], Any]):
-        resolve_reference_with_function(self.exp, f)
-
-
-@dataclass(eq=False)
-class ProjectEntity(SolePredecessorTransformingInstruction):
-    entity_type: str
-
-
-@dataclass(eq=False)
-class ProjectAttrs(SolePredecessorTransformingInstruction):
-    # mashumaro does not support typing.Iterable, only List
-    attrs: List[str]
-
-
-@dataclass(eq=False)
-class DataSource(SourceInstruction):
-    uri: InitVar[Optional[str]] = None
-    default_interface: InitVar[Optional[str]] = None
-    interface: str = ""
-    datasource: str = ""
-
-    def __post_init__(self, uri: Optional[str], default_interface: Optional[str]):
-        super().__post_init__()
-        if uri:
-            # normal constructor, not from deserliazation
-            xs = uri.split("://")
-            if len(xs) == 2:
-                self.interface = xs[0]
-                self.datasource = xs[1]
-            elif len(xs) == 1 and default_interface:
-                self.interface = default_interface
-                self.datasource = xs[0]
-            else:
-                raise InvalidDataSource(uri)
-        else:
-            # from deserliazation; mashumaro will take care
-            pass
-
-
-@dataclass(eq=False)
-class Variable(SolePredecessorTransformingInstruction):
-    name: str
-    # required to dereference a variable that has been created multiple times
-    # the variable with the largest version will be used by dereference
-    version: int = 0
-
-
-@dataclass(eq=False)
-class Reference(IntermediateInstruction):
-    """Referred Kestrel variable (used in AST) before de-referencing to a Kestrel variable"""
-
-    name: str
-
-
-@dataclass(eq=False)
-class Explain(SolePredecessorTransformingInstruction):
-    pass
-
-
-@dataclass(eq=False)
-class Limit(SolePredecessorTransformingInstruction):
-    num: int
-
-
-@dataclass(eq=False)
-class Offset(SolePredecessorTransformingInstruction):
-    num: int
-
-
-@dataclass(eq=False)
-class Construct(SourceInstruction):
-    data: List[Mapping[str, Union[str, int, bool]]]
-    interface: str = CACHE_INTERFACE_IDENTIFIER
-
-
-class SortDirection(str, Enum):
-    ASC = "ASC"
-    DESC = "DESC"
-
-
-@dataclass(eq=False)
-class Sort(SolePredecessorTransformingInstruction):
-    attribute: str
-    direction: SortDirection = SortDirection.DESC
-
-
-@typechecked
-def get_instruction_class(name: str) -> Type[Instruction]:
-    classes = inspect.getmembers(sys.modules[__name__], inspect.isclass)
-    instructions = [cls for _, cls in classes if issubclass(cls, Instruction)]
-    try:
-        return next(filter(lambda cls: cls.__name__ == name, instructions))
-    except StopIteration:
-        raise InvalidInstruction(name)
-
-
-@typechecked
-def instruction_from_dict(d: Mapping[str, Union[str, bool, int]]) -> Instruction:
-    instruction_class = get_instruction_class(d["instruction"])
-    try:
-        instruction = instruction_class.from_dict(d)
-        instruction.id = uuid.UUID(d["id"])
-    except:
-        raise InvalidSeralizedInstruction(d)
-    else:
-        return instruction
-
-
-@typechecked
-def instruction_from_json(json_str: str) -> Instruction:
-    instruction_in_dict = json.loads(json_str)
-    return instruction_from_dict(instruction_in_dict)
diff --git a/packages-nextgen/kestrel_core/src/kestrel/ir/relation/__init__.py b/packages-nextgen/kestrel_core/src/kestrel/ir/relation/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/ir/relation/relation.yaml b/packages-nextgen/kestrel_core/src/kestrel/ir/relation/relation.yaml
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/ir/relation/utils.py b/packages-nextgen/kestrel_core/src/kestrel/ir/relation/utils.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/mapping/__init__.py b/packages-nextgen/kestrel_core/src/kestrel/mapping/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/src/kestrel/mapping/data_model.py b/packages-nextgen/kestrel_core/src/kestrel/mapping/data_model.py
deleted file mode 100644
index d05bd943..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/mapping/data_model.py
+++ /dev/null
@@ -1,279 +0,0 @@
-import logging
-from typing import Optional, Union
-
-import dpath
-import numpy as np
-import yaml
-from pandas import DataFrame
-from typeguard import typechecked
-
-from kestrel.mapping.transformers import (
-    run_transformer,
-    run_transformer_on_series,
-)
-from kestrel.utils import list_folder_files
-
-_logger = logging.getLogger(__name__)
-
-
-def _add_mapping(obj: dict, key: str, mapping: dict):
-    """Add `key` -> `mapping` to `obj`, appending if necessary"""
-    existing_mapping = obj.get(key)
-    if existing_mapping:
-        if isinstance(existing_mapping, str):
-            existing_mapping = [{"ocsf_field": existing_mapping}]
-        elif isinstance(existing_mapping, dict):
-            existing_mapping = [existing_mapping]
-    else:
-        existing_mapping = []
-    existing_mapping.append(mapping)
-    obj[key] = existing_mapping
-
-
-def _reverse_dict(obj: dict, k: str, v: dict):
-    """Reverse a single OCSF -> native mapping and add it to `obj`"""
-    key = v["native_field"]
-    mapping = {i: j for i, j in v.items() if i != "native_field"}
-    mapping["ocsf_field"] = k
-    _add_mapping(obj, key, mapping)
-
-
-def _add_attr(obj: dict, key: str, value: str):
-    """Add `key` -> `value` to `obj`, appending if necessary"""
-    if key not in obj:
-        obj[key] = value
-    else:
-        existing = obj[key]
-        if isinstance(existing, str):
-            obj[key] = [existing, value]
-        else:
-            existing.append(value)
-
-
-def reverse_mapping(obj: dict, prefix: str = None, result: dict = None) -> dict:
-    """Reverse the mapping; return native -> OCSF map"""
-    if result is None:
-        result = {}
-    for k, v in obj.items():
-        k = ".".join((prefix, k)) if prefix else k
-        # Recurse if necessary
-        if isinstance(v, str):
-            _add_attr(result, v, k)
-        elif isinstance(v, list):
-            # Need to handle multiple mappings
-            for i in v:
-                if isinstance(i, str):
-                    _add_attr(result, i, k)
-                elif "native_field" in i:
-                    _reverse_dict(result, k, i)
-                else:
-                    # Need to "deep" merge with current results
-                    reverse_mapping(i, k, result)
-        elif isinstance(v, dict):
-            # First determine if this is a complex mapping or just another level
-            if "native_field" in v:
-                _reverse_dict(result, k, v)
-            else:
-                # Need to "deep" merge with current results
-                reverse_mapping(v, k, result)
-
-    return result
-
-
-def _get_map_triple(d: dict, prefix: str, op: str, value) -> tuple:
-    mapped_op = d.get(f"{prefix}_op")
-    transform = d.get(f"{prefix}_value")
-    new_value = run_transformer(transform, value)
-    new_op = mapped_op if mapped_op else op
-    return (d[f"{prefix}_field"], new_op, new_value)
-
-
-def translate_comparison_to_native(
-    dmm: dict, field: str, op: str, value: Union[str, int, float]
-) -> list:
-    """Translate the (`field`, `op`, `value`) triple using data model map `dmm`
-
-    This function may be used in datasource interfaces to translate a comparison
-    in the OCSF data model to the native data model, according to the data model
-    mapping in `dmm`.
-
-    This function translates the (`field`, `op`, `value`) triple into a list of
-    translated triples based on the provided data model map. The data model map
-    is a dictionary that maps fields from one data model to another. For
-    example, if you have a field named "user.name" in your data model, but the
-    corresponding field in the native data model is "username", then you can use
-    the data model map to translate the field name.
-
-    Parameters:
-        dmm: A dictionary that maps fields from one data model to another.
-        field: The field name to be translated.
-        op: The comparison operator.
-        value: The value to be compared against.
-
-    Returns:
-        A list of translated triples.
-
-    Raises:
-        KeyError: If the field cannot be found in the data model map.
-    """
-    _logger.debug("comp_to_native: %s %s %s", field, op, value)
-    result = []
-    mapping = dmm.get(field)
-    if mapping:
-        if isinstance(mapping, str):
-            # Simple 1:1 field name mapping
-            result.append((mapping, op, value))
-        else:
-            raise NotImplementedError("complex native mapping")
-    else:
-        parts = field.split(".")
-        tmp = dmm
-        for part in parts:
-            if isinstance(tmp, dict):
-                tmp = tmp.get(part, {})
-            else:
-                break
-        if tmp:
-            if isinstance(tmp, list):
-                for i in tmp:
-                    if isinstance(i, dict):
-                        result.append(_get_map_triple(i, "native", op, value))
-                    else:
-                        result.append((i, op, value))
-            elif isinstance(tmp, dict):
-                result.append(_get_map_triple(tmp, "native", op, value))
-            elif isinstance(tmp, str):
-                result.append((tmp, op, value))
-        else:
-            # Pass-through
-            result.append((field, op, value))
-    _logger.debug("comp_to_native: return %s", result)
-    return result
-
-
-def translate_comparison_to_ocsf(
-    dmm: dict, field: str, op: str, value: Union[str, int, float]
-) -> list:
-    """Translate the (`field`, `op`, `value`) triple using data model map `dmm`
-
-    This function is used in the frontend to translate a comparison in
-    the STIX (or, in the future, ECS) data model to the OCSF data
-    model, according to the data model mapping in `dmm`.
-
-    This function translates the (`field`, `op`, `value`) triple into a list of
-    translated triples based on the provided data model map. The data model map
-    is a dictionary that maps fields from one data model to another. For
-    example, if you have a field named "user.name" in your data model, but the
-    corresponding field in the native data model is "username", then you can use
-    the data model map to translate the field name.
-
-    Parameters:
-        dmm: A dictionary that maps fields from one data model to another.
-        field: The field name to be translated.
-        op: The comparison operator.
-        value: The value to be compared against.
-
-    Returns:
-        A list of translated triples.
-
-    Raises:
-        KeyError: If the field cannot be found in the data model map.
-
-    """
-    _logger.debug("comp_to_ocsf: %s %s %s", field, op, value)
-    result = []
-    mapping = dmm.get(field)
-    if isinstance(mapping, str):
-        # Simple 1:1 field name mapping
-        result.append((mapping, op, value))
-    elif isinstance(mapping, list):
-        for i in mapping:
-            if isinstance(i, dict):
-                result.append(_get_map_triple(i, "ocsf", op, value))
-            else:
-                result.append((i, op, value))
-    return result
-
-
-@typechecked
-def load_default_mapping(
-    data_model_name: str,
-    mapping_pkg: str = "kestrel.mapping",
-    submodule: str = "entityattribute",
-):
-    result = {}
-    entityattr_mapping_files = list_folder_files(
-        mapping_pkg, submodule, prefix=data_model_name, extension="yaml"
-    )
-    for f in entityattr_mapping_files:
-        with open(f, "r") as fp:
-            result.update(yaml.safe_load(fp))
-    return result
-
-
-@typechecked
-def _get_from_mapping(mapping: Union[str, list, dict], key) -> list:
-    result = []
-    if isinstance(mapping, list):
-        for i in mapping:
-            if isinstance(i, dict):
-                result.append(i[key])
-            else:
-                result.append(i)
-    elif isinstance(mapping, dict):
-        result.append(mapping[key])
-    elif isinstance(mapping, str):
-        result.append(mapping)
-    return result
-
-
-@typechecked
-def translate_projection_to_native(
-    dmm: dict,
-    entity_type: Optional[str],
-    attrs: Optional[list],
-    # TODO: optional str or callable for joining entity_type and attr?
-) -> list:
-    result = []
-    if entity_type:
-        dmm = dmm[entity_type]
-    if not attrs:
-        for native_field, mapping in reverse_mapping(dmm).items():
-            result.extend(
-                [(native_field, i) for i in _get_from_mapping(mapping, "ocsf_field")]
-            )
-        attrs = []
-    for attr in attrs:
-        mapping = dmm.get(attr)
-        if not mapping:
-            parts = attr.split(".")
-            tmp = dmm
-            for part in parts:
-                if isinstance(tmp, dict):
-                    tmp = tmp.get(part, {})
-                else:
-                    break
-            if tmp:
-                mapping = tmp
-        if mapping:
-            result.extend(
-                [(i, attr) for i in _get_from_mapping(mapping, "native_field")]
-            )
-        else:
-            # Pass-through?
-            result.append((attr, attr))  # FIXME: raise exception instead?
-    _logger.debug("proj_to_native: return %s", result)
-    return result
-
-
-@typechecked
-def translate_dataframe(df: DataFrame, dmm: dict) -> DataFrame:
-    # Translate results into Kestrel OCSF data model
-    # The column names of df are already mapped
-    df = df.replace({np.nan: None})
-    for col in df.columns:
-        mapping = dpath.get(dmm, col, separator=".")
-        if isinstance(mapping, dict):
-            transformer_name = mapping.get("ocsf_value")
-            df[col] = run_transformer_on_series(transformer_name, df[col])
-    return df
diff --git a/packages-nextgen/kestrel_core/src/kestrel/mapping/entityattribute/ecs.yaml b/packages-nextgen/kestrel_core/src/kestrel/mapping/entityattribute/ecs.yaml
deleted file mode 100644
index d4a1bf75..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/mapping/entityattribute/ecs.yaml
+++ /dev/null
@@ -1,233 +0,0 @@
-# https://schema.ocsf.io/1.1.0/objects/file
-file:
-    accessed_time: file.accessed
-    attributes: file.attributes
-    created_time: file.created
-    # This "hashes" notation comes from jmespath (filter projection)
-    # It's much easier to use the ECS notation in this case
-    hashes[?algorithm_id == 1]:
-        value: hash.md5
-    hashes[?algorithm_id == 2]:
-        value: hash.sha1
-    hashes[?algorithm_id == 3]:
-        value: hash.sha256
-    hashes[?algorithm_id == 4]:
-        value: hash.sha512
-    hashes[?algorithm_id == 5]:
-        value: hash.ssdeep
-    hashes[?algorithm_id == 6]:
-        value: hash.tlsh
-    hashes[*]:
-        value:
-          - hash.md5
-          - hash.sha1
-          - hash.sha256
-          - hash.sha512
-          - hash.ssdeep
-          - hash.tlsh
-    modified_time: file.ctime
-    mime_type: file.mime_type
-    mode: file.mode
-    modified_time: file.mtime
-    name: file.name
-    owner: file.owner
-    parent_folder: file.directory
-    path: file.path
-    size: file.size
-    type: file.type
-    xattributes:
-        primary_group: file.gid
-        link_name: file.target_path
-
-
-# https://schema.ocsf.io/1.1.0/objects/group
-group:
-    domain: group.domain
-    name: group.name
-    uid: group.id
-
-
-# https://schema.ocsf.io/1.1.0/objects/process
-process:
-    cmd_line: process.command_line
-    name: process.name
-    pid: process.pid
-    uid: process.entity_id
-    file:
-        name:
-            native_field: process.executable
-            native_op: LIKE
-            native_value: endswith
-            ocsf_value: basename
-        path: process.executable
-        parent_folder:
-            native_field: process.executable
-            native_op: LIKE
-            native_value: startswith
-            ocsf_value: dirname
-        # This "hashes" notation comes from jmespath (filter projection)
-        # It's much easier to use the ECS notation in this case
-        hashes[?algorithm_id == 1]:
-            value: process.hash.md5
-        hashes[?algorithm_id == 2]:
-            value: process.hash.sha1
-        hashes[?algorithm_id == 3]:
-            value: process.hash.sha256
-        hashes[?algorithm_id == 4]:
-            value: process.hash.sha512
-        hashes[?algorithm_id == 5]:
-            value: process.hash.ssdeep
-        hashes[?algorithm_id == 6]:
-            value: process.hash.tlsh
-        hashes[*]:
-            value:
-              - process.hash.md5
-              - process.hash.sha1
-              - process.hash.sha256
-              - process.hash.sha512
-              - process.hash.ssdeep
-              - process.hash.tlsh
-    parent_process:
-        cmd_line: process.parent.command_line
-        name: process.parent.name
-        pid: process.parent.pid
-        uid: process.parent.entity_id
-        file:
-            name:
-                native_field: process.parent.executable
-                native_op: LIKE
-                native_value: endswith
-                ocsf_value: basename
-            path: process.parent.executable
-            parent_folder:
-                native_field: process.parent.executable
-                native_op: LIKE
-                native_value: startswith
-                ocsf_value: dirname
-
-
-# src_endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
-src_endpoint: &src_ref
-    domain:
-        - client.domain
-        - source.domain
-    hostname:
-        - client.domain
-        - source.domain
-    ip:
-        - client.ip
-        - source.ip
-    mac:
-        - client.mac
-        - source.mac
-    port:
-        - client.port
-        - source.port
-
-
-# endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
-endpoint:
-    domain:
-        - client.domain
-        - source.domain
-        - server.domain
-        - destination.domain
-    hostname:
-        - client.domain
-        - source.domain
-        - server.domain
-        - destination.domain
-    ip:
-        - client.ip
-        - source.ip
-        - server.ip
-        - destination.ip
-    mac:
-        - client.mac
-        - source.mac
-        - server.mac
-        - destination.mac
-    port:
-        - client.port
-        - source.port
-        - server.port
-        - destination.port
-
-
-# dst_endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
-dst_endpoint: &dst_ref
-    domain:
-        - server.domain
-        - destination.domain
-    hostname:
-        - server.domain
-        - destination.domain
-    ip:
-        - server.ip
-        - destination.ip
-    mac:
-        - server.mac
-        - destination.mac
-    port:
-        - server.port
-        - destination.port
-
-
-# https://schema.ocsf.io/1.1.0/objects/network_traffic
-# should be `network_traffic`?
-traffic: &traffic
-    bytes: network.bytes
-    bytes_in:
-        - destination.bytes
-        - server.bytes
-    bytes_out:
-        - client.bytes
-        - source.bytes
-    packets: network.packets
-    packets_in:
-        - destination.packets
-        - server.packets
-    packets_out:
-        - client.packets
-        - source.packets
-
-
-# https://schema.ocsf.io/1.1.0/objects/network_connection_info
-connection_info:
-    direction: network.direction  #TODO: need transformer?
-    protocol_num: network.iana_number
-    protocol_name: network.transport
-    protocol_ver: network.type
-    protocol_ver_id:
-        native_field: network.type
-        native_value: ip_version_to_network_layer
-        ocsf_value: network_layer_to_ip_version
-
-
-# https://schema.ocsf.io/1.1.0/objects/certificate
-certificate:
-    expiration_time: x509.not_after
-    created_time: x509.not_before
-    serial_number: x509.serial_number
-    fingerprints[*]:
-        algorithm: x509.signature_algorithm
-    version: x509.version_number
-    issuer: x509.issuer.distinguished_name
-    subject: x509.subject.distinguished_name
-    #uid:
-
-
-# https://schema.ocsf.io/1.1.0/objects/user
-user:
-    domain: user.domain
-    full_name: user.full_name
-    name: user.name
-    uid: user.id
-
-
-# https://schema.ocsf.io/1.1.0/classes/network_activity
-# Network Activity [4001] Class
-network_activity:
-    src_endpoint: *src_ref
-    dst_endpoint: *dst_ref
-    traffic: *traffic
diff --git a/packages-nextgen/kestrel_core/src/kestrel/mapping/entityattribute/stix.yaml b/packages-nextgen/kestrel_core/src/kestrel/mapping/entityattribute/stix.yaml
deleted file mode 100644
index 7082e6dd..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/mapping/entityattribute/stix.yaml
+++ /dev/null
@@ -1,143 +0,0 @@
-# https://schema.ocsf.io/1.1.0/objects/file
-file:
-    name: file:name
-    size: file:size
-    accessed_time: file:accessed
-    created_time: file:created
-    modified_time: file:modified
-    # This "hashes" notation comes from jmespath (filter projection)
-    # It's much easier to use the ECS notation in this case
-    hashes[?algorithm_id == 1]:
-        value: file:hashes.MD5
-    hashes[?algorithm_id == 2]:
-        value: "file:hashes.'SHA-1'"
-    hashes[?algorithm_id == 3]:
-        value: "file:hashes.'SHA-256'"
-    hashes[?algorithm_id == 4]:
-        value: "file:hashes.'SHA-512'"
-    hashes[?algorithm_id == 5]:
-        value: file:hashes.SSDEEP
-    hashes[?algorithm_id == 6]:
-        value: file:hashes.TLSH
-    hashes[*]:
-        value:
-          - file:hashes.MD5
-          - "file:hashes.'SHA-1'"
-          - "file:hashes.'SHA-256'"
-          - "file:hashes.'SHA-512'"
-          - file:hashes.SSDEEP
-          - file:hashes.TLSH
-
-
-# https://schema.ocsf.io/1.1.0/objects/group
-# group:
-#     domain:
-#     name:
-#     uid:
-
-
-# https://schema.ocsf.io/1.1.0/objects/process
-process:
-    cmd_line: process:command_line
-    name: process:name
-    pid: process:pid
-    uid: process:x_unique_id
-    file:
-        name: process:binary_ref.name
-        parent_folder: process:binary_ref.parent_directory_ref.path
-        # This "hashes" notation comes from jmespath (filter projection)
-        # It's much easier to use the ECS notation in this case
-        hashes[?algorithm_id == 1]:
-            value: process:binary_ref.hashes.MD5
-        hashes[?algorithm_id == 2]:
-            value: process:binary_ref.hashes.'SHA-1'
-        hashes[?algorithm_id == 3]:
-            value: process:binary_ref.hashes.'SHA-256'
-        hashes[?algorithm_id == 4]:
-            value: process:binary_ref.hashes.'SHA-512'
-        hashes[?algorithm_id == 5]:
-            value: process:binary_ref.hashes.SSDEEP
-        hashes[?algorithm_id == 6]:
-            value: process:binary_ref.hashes.TLSH
-        hashes[*]:
-            value:
-              - process:binary_ref.hashes.MD5
-              - process:binary_ref.hashes.'SHA-1'
-              - process:binary_ref.hashes.'SHA-256'
-              - process:binary_ref.hashes.'SHA-512'
-              - process:binary_ref.hashes.SSDEEP
-              - process:binary_ref.hashes.TLSH
-    parent_process:
-        cmd_line: process:parent_ref.command_line
-        name: process:parent_ref.name
-        pid: process:parent_ref.pid
-        uid: process:parent_ref.x_unique_id
-        file:
-            name: process:parent_ref.binary_ref.name
-            parent_folder: process:parent_ref.binary_ref.parent_directory_ref.path
-
-
-# dst_endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
-dst_endpoint:
-    ip:
-        - network-traffic:dst_ref.value
-        - ipv4-addr:value
-    port: network-traffic:dst_port
-
-
-# src_endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
-src_endpoint:
-    ip:
-        - network-traffic:src_ref.value
-        - ipv4-addr:value
-    port: network-traffic:src_port
-
-
-# https://schema.ocsf.io/1.1.0/objects/endpoint
-endpoint:
-    ip: ipv4-addr:value
-
-
-# https://schema.ocsf.io/1.1.0/objects/device
-device:
-    ip: ipv4-addr:value
-
-
-# https://schema.ocsf.io/1.1.0/objects/network_traffic
-traffic:  # should be `network_traffic`?
-    #TODO: bytes: sum of byte counts?
-    bytes_in: network-traffic:dst_byte_count
-    bytes_out: network-traffic:src_byte_count
-    #TODO: packets: sum of packet counts?
-    packets_in: network-traffic:dst_packets
-    packets_out: network-traffic:src_packets
-
-
-# https://schema.ocsf.io/1.1.0/objects/network_connection_info
-# connection_info:
-#     direction:
-#     protocol_num:
-#     protocol_name:
-#     protocol_ver:
-#     protocol_ver_id:
-
-
-# https://schema.ocsf.io/1.1.0/objects/certificate
-certificate:
-    expiration_time: x509-certificate:validity_not_after
-    created_time: x509-certificate:validity_not_before
-    serial_number: x509-certificate:serial_number
-    fingerprints[*]:
-        algorithm: x509-certificate:signature_algorithm
-    version: x509-certificate:version_number
-    issuer: x509-certificate:issuer
-    subject: x509-certificate:subject
-    #uid:
-
-
-# https://schema.ocsf.io/1.1.0/objects/user
-user:
-    full_name: user-account:display_name
-    name: user-account:account_login
-    type: user-account:account_type
-    uid: user-account:user_id
diff --git a/packages-nextgen/kestrel_core/src/kestrel/mapping/entityname/alias.yaml b/packages-nextgen/kestrel_core/src/kestrel/mapping/entityname/alias.yaml
deleted file mode 100644
index 306b557a..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/mapping/entityname/alias.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-event: base_event
-activity: base_event
-
diff --git a/packages-nextgen/kestrel_core/src/kestrel/mapping/entityname/ecs.yaml b/packages-nextgen/kestrel_core/src/kestrel/mapping/entityname/ecs.yaml
deleted file mode 100644
index 8d06636e..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/mapping/entityname/ecs.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-process: process
-file: file
-group: group
-client: network_endpoint
-destination: network_endpoint
-server: network_endpoint
-source: network_endpoint
-network: network_activity
-user: user
diff --git a/packages-nextgen/kestrel_core/src/kestrel/mapping/entityname/stix.yaml b/packages-nextgen/kestrel_core/src/kestrel/mapping/entityname/stix.yaml
deleted file mode 100644
index cd80756a..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/mapping/entityname/stix.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-# TODO mapping for artifact:
-autonomous-system: organization
-directory: file
-domain-name: endpoint
-email-addr: user
-email-message: email
-file: file
-ipv4-addr: endpoint
-ipv6-addr: endpoint
-mac-addr: endpoint
-network-traffic: network_activity
-process: process
-software: product
-url: http_request
-user-account: user
-x-ibm-finding: security_finding
-x-ibm-ttp-tagging: attack
-x-oca-asset: device
-x-oca-event: base_event
-x509-certificate: certificate
-windows-registry-key: win/registry_key
diff --git a/packages-nextgen/kestrel_core/src/kestrel/mapping/transformers.py b/packages-nextgen/kestrel_core/src/kestrel/mapping/transformers.py
deleted file mode 100644
index 82202dcb..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/mapping/transformers.py
+++ /dev/null
@@ -1,110 +0,0 @@
-"""Kestrel Data Model Map value transformers"""
-
-from datetime import datetime, timezone
-from typing import Callable
-
-from pandas import Series
-
-
-# Dict of "registered" transformers
-_transformers = {}
-
-
-def transformer(func: Callable) -> Callable:
-    """A decorator for registering a transformer"""
-    _transformers[func.__name__] = func
-    return func
-
-
-@transformer
-def to_epoch_ms(value: str) -> int:
-    """Convert a time value to milliseconds since the epoch"""
-    if "." in value:
-        time_pattern = "%Y-%m-%dT%H:%M:%S.%fZ"
-    else:
-        time_pattern = "%Y-%m-%dT%H:%M:%SZ"
-    dt = datetime.strptime(value, time_pattern).replace(tzinfo=timezone.utc)
-    return int(dt.timestamp() * 1000)
-
-
-@transformer
-def dirname(path: str) -> str:  # TODO: rename to winpath_dirname?
-    """Get the directory part of `path`"""
-    path_dir, _, _ = path.rpartition("\\")
-    return path_dir
-
-
-@transformer
-def basename(path: str) -> str:  # TODO: rename to winpath_dirname?
-    """Get the filename part of `path`"""
-    _, _, path_file = path.rpartition("\\")
-    return path_file
-
-
-@transformer
-def startswith(value: str) -> str:  # TODO: rename to winpath_startswith?
-    return f"{value}\\%"
-
-
-@transformer
-def endswith(value: str) -> str:  # TODO: rename to winpath_endswith?
-    return f"%\\{value}"
-
-
-@transformer
-def to_int(value) -> int:
-    """Ensure `value` is an int"""
-    try:
-        return int(value)
-    except ValueError:
-        # Maybe it's a hexadecimal string?
-        return int(value, 16)
-
-
-@transformer
-def to_str(value) -> str:
-    """Ensure `value` is a str"""
-    return str(value)
-
-
-@transformer
-def ip_version_to_network_layer(value: int) -> str:
-    if value == 4:
-        return "ipv4"
-    elif value == 6:
-        return "ipv6"
-    elif value == 99:
-        return "other"
-    return "unknown"
-
-
-@transformer
-def network_layer_to_ip_version(val: str) -> int:
-    value = val.lower()
-    if value == "ipv4":
-        return 4
-    elif value == "ipv6":
-        return 6
-    elif value == "other":
-        return 99
-    return 0
-
-
-def run_transformer(transformer_name: str, value):
-    """Run the registered transformer with name `transformer_name` on `value`"""
-    func = _transformers.get(transformer_name)
-    if func:
-        result = func(value)
-    else:
-        raise NameError(transformer_name)
-    return result
-
-
-def run_transformer_on_series(transformer_name: str, value: Series):
-    """Run the registered transformer with name `transformer_name` on `value`"""
-    func = _transformers.get(transformer_name)
-    if func:
-        result = value.apply(func)
-    else:
-        raise NameError(transformer_name)
-    return result
diff --git a/packages-nextgen/kestrel_core/src/kestrel/session.py b/packages-nextgen/kestrel_core/src/kestrel/session.py
deleted file mode 100644
index 48ebf1f8..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/session.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import logging
-from contextlib import AbstractContextManager
-from uuid import UUID, uuid4
-from typing import Iterable
-from typeguard import typechecked
-
-from kestrel.display import Display, GraphExplanation
-from kestrel.ir.graph import IRGraph
-from kestrel.ir.instructions import Instruction, Explain
-from kestrel.frontend.parser import parse_kestrel
-from kestrel.cache import AbstractCache, SqliteCache
-from kestrel.config.internal import CACHE_INTERFACE_IDENTIFIER
-from kestrel.interface import AbstractInterface, InterfaceManager
-from kestrel.exceptions import InstructionNotFound
-
-
-_logger = logging.getLogger(__name__)
-
-
-@typechecked
-class Session(AbstractContextManager):
-    """Kestrel huntflow execution session"""
-
-    def __init__(self):
-        self.session_id = uuid4()
-        self.irgraph = IRGraph()
-
-        # load all interfaces; cache is a special interface
-        cache = SqliteCache()
-        self.interface_manager = InterfaceManager([cache])
-
-    def execute(self, huntflow_block: str) -> Iterable[Display]:
-        """Execute a Kestrel huntflow block.
-
-        Execute a Kestrel statement or multiple consecutive statements (a
-        huntflow block) This method has the context of already executed
-        huntflow blocks in this session, so all existing variables can be
-        referred in the new huntflow block.
-
-        Parameters:
-            huntflow_block: the new huntflow block to be executed
-
-        Returns:
-            Evaluated result per Return instruction
-        """
-        return list(self.execute_to_generate(huntflow_block))
-
-    def execute_to_generate(self, huntflow_block: str) -> Iterable[Display]:
-        """Execute a Kestrel huntflow and put results in a generator.
-
-        Parameters:
-            huntflow_block: the new huntflow block to be executed
-
-        Yields:
-            Evaluated result per Return instruction
-        """
-        irgraph_new = parse_kestrel(huntflow_block)
-        self.irgraph.update(irgraph_new)
-
-        for ret in irgraph_new.get_returns():
-            yield self.evaluate_instruction(ret)
-
-    def evaluate_instruction(self, ins: Instruction) -> Display:
-        """Evaluate a single Instruction.
-
-        Parameters:
-            ins: the instruction to evaluate
-
-        Returns:
-            Evaluated result (Kestrel Display object)
-        """
-        if ins not in self.irgraph:
-            raise InstructionNotFound(ins.to_dict())
-
-        pred = self.irgraph.get_trunk_n_branches(ins)[0]
-        is_explain = isinstance(pred, Explain)
-        display = GraphExplanation([])
-
-        _interface_manager = (
-            self.interface_manager.copy_with_virtual_cache()
-            if is_explain
-            else self.interface_manager
-        )
-        _cache = _interface_manager[CACHE_INTERFACE_IDENTIFIER]
-
-        # The current logic leads to caching results from non-cache and lastly
-        # evaluate in cache.
-        # TODO: may evaluate cache first, then push dependent variables to the
-        # last interface to eval; this requires priority of interfaces
-        while True:
-            for g in self.irgraph.find_dependent_subgraphs_of_node(ins, _cache):
-                interface = _interface_manager[g.interface]
-                for iid, _display in (
-                    interface.explain_graph(g)
-                    if is_explain
-                    else interface.evaluate_graph(g)
-                ).items():
-                    if is_explain:
-                        display.graphlets.append(_display)
-                    else:
-                        display = _display
-                    if interface is not _cache:
-                        _cache[iid] = display
-                    if iid == ins.id:
-                        return display
-
-    def do_complete(self, huntflow_block: str, cursor_pos: int):
-        """Kestrel code auto-completion.
-
-        Parameters:
-            huntflow_block: Kestrel code
-            cursor_pos: the position to start completion (index in ``huntflow_block``)
-
-        Returns:
-            A list of suggested strings to complete the code
-        """
-        raise NotImplementedError()
-
-    def close(self):
-        """Explicitly close the session.
-
-        This may be executed by a context manager or when the program exits.
-        """
-        # Note there are two conditions that trigger this function, so it is probably executed twice
-        # Be careful to write the logic in this function to avoid deleting nonexist files/dirs
-        if CACHE_INTERFACE_IDENTIFIER in self.interface_manager:
-            self.interface_manager.del_cache()
-
-    def __exit__(self, exception_type, exception_value, traceback):
-        self.close()
diff --git a/packages-nextgen/kestrel_core/src/kestrel/utils.py b/packages-nextgen/kestrel_core/src/kestrel/utils.py
deleted file mode 100644
index 02cbb5b3..00000000
--- a/packages-nextgen/kestrel_core/src/kestrel/utils.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import collections.abc
-from importlib import resources
-from kestrel.__future__ import is_python_older_than_minor_version
-import os
-from pathlib import Path
-from pkgutil import get_data
-from typeguard import typechecked
-from typing import Optional, Mapping, Iterable
-
-
-@typechecked
-def load_data_file(package_name: str, file_name: str) -> str:
-    try:
-        # resources.files() is introduced in Python 3.9
-        content = resources.files(package_name).joinpath(file_name).read_text()
-    except AttributeError:
-        # Python 3.8; deprecation warning forward
-        if is_python_older_than_minor_version(9):
-            content = get_data(package_name, file_name).decode("utf-8")
-
-    return content
-
-
-@typechecked
-def list_folder_files(
-    package_name: str,
-    folder_name: str,
-    prefix: Optional[str] = None,
-    extension: Optional[str] = None,
-) -> Iterable[str]:
-    # preprocesss extension to add dot it not there
-    if extension and extension[0] != ".":
-        extension = "." + extension
-    try:
-        file_paths = resources.files(package_name).joinpath(folder_name).iterdir()
-    except AttributeError:
-        if is_python_older_than_minor_version(9):
-            import pkg_resources
-
-            file_names = pkg_resources.resource_listdir(package_name, folder_name)
-            file_paths = [
-                Path(
-                    pkg_resources.resource_filename(
-                        package_name, os.path.join(folder_name, filename)
-                    )
-                )
-                for filename in file_names
-            ]
-    file_list = (
-        f
-        for f in file_paths
-        if (
-            f.is_file()
-            and (f.name.endswith(extension) if extension else True)
-            and (f.name.startswith(prefix) if prefix else True)
-        )
-    )
-    return file_list
-
-
-@typechecked
-def unescape_quoted_string(s: str) -> str:
-    if s.startswith("r"):
-        return s[2:-1]
-    else:
-        return s[1:-1].encode("utf-8").decode("unicode_escape")
-
-
-@typechecked
-def update_nested_dict(dict_old: Mapping, dict_new: Optional[Mapping]) -> Mapping:
-    if dict_new:
-        for k, v in dict_new.items():
-            if isinstance(v, collections.abc.Mapping) and k in dict_old:
-                dict_old[k] = update_nested_dict(dict_old[k], v)
-            else:
-                dict_old[k] = v
-    return dict_old
diff --git a/packages-nextgen/kestrel_core/tests/__init__.py b/packages-nextgen/kestrel_core/tests/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_core/tests/test_cache_inmemory.py b/packages-nextgen/kestrel_core/tests/test_cache_inmemory.py
deleted file mode 100644
index 1a0bb9ca..00000000
--- a/packages-nextgen/kestrel_core/tests/test_cache_inmemory.py
+++ /dev/null
@@ -1,121 +0,0 @@
-import pytest
-from pandas import DataFrame
-from uuid import uuid4
-
-from kestrel.cache import InMemoryCache
-from kestrel.cache.inmemory import InMemoryCacheVirtual
-from kestrel.ir.graph import IRGraph, IRGraphEvaluable
-from kestrel.frontend.parser import parse_kestrel
-
-
-def test_inmemory_cache_set_get_del():
-    c = InMemoryCache()
-    idx = uuid4()
-    df = DataFrame([1, 2, 3])
-    c[idx] = df
-    assert df.equals(c[idx])
-    del c[idx]
-    assert idx not in c
-
-
-def test_inmemory_cache_constructor():
-    ids = [uuid4() for i in range(5)]
-    df = DataFrame([1, 2, 3])
-    c = InMemoryCache({x:df for x in ids})
-    for u in ids:
-        assert df.equals(c[u])
-    for u in ids:
-        del c[u]
-        assert u not in c
-
-
-def test_eval_new_filter_disp():
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-browsers = proclist WHERE name = 'firefox.exe' OR name = 'chrome.exe'
-DISP browsers ATTR name, pid
-"""
-    graph = IRGraphEvaluable(parse_kestrel(stmt))
-    c = InMemoryCache()
-    mapping = c.evaluate_graph(graph)
-
-    # check the return is correct
-    rets = graph.get_returns()
-    assert len(rets) == 1
-    df = mapping[rets[0].id]
-    assert df.to_dict("records") == [ {"name": "firefox.exe", "pid": 201}
-                                    , {"name": "chrome.exe", "pid": 205}
-                                    ]
-    # check whether `proclist` is cached
-    proclist = graph.get_variable("proclist")
-    assert c[proclist.id].to_dict("records") == [ {"name": "cmd.exe", "pid": 123}
-                                                , {"name": "explorer.exe", "pid": 99}
-                                                , {"name": "firefox.exe", "pid": 201}
-                                                , {"name": "chrome.exe", "pid": 205}
-                                                ]
-    # check whether `browsers` is cached
-    browsers = graph.get_variable("browsers")
-    assert c[browsers.id].to_dict("records") == [ {"name": "firefox.exe", "pid": 201}
-                                                , {"name": "chrome.exe", "pid": 205}
-                                                ]
-
-
-def test_eval_filter_with_ref():
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-browsers = proclist WHERE name = 'firefox.exe' OR name = 'chrome.exe'
-specials = proclist WHERE pid IN [123, 201]
-p2 = proclist WHERE pid = browsers.pid and name = specials.name
-DISP p2 ATTR name, pid
-"""
-    graph = IRGraphEvaluable(parse_kestrel(stmt))
-    c = InMemoryCache()
-    mapping = c.evaluate_graph(graph)
-
-    # check the return is correct
-    rets = graph.get_returns()
-    assert len(rets) == 1
-    df = mapping[rets[0].id]
-    assert df.to_dict("records") == [ {"name": "firefox.exe", "pid": 201} ]
-
-def test_get_virtual_copy():
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-browsers = proclist WHERE name = 'firefox.exe' OR name = 'chrome.exe'
-"""
-    graph = IRGraphEvaluable(parse_kestrel(stmt))
-    c = InMemoryCache()
-    mapping = c.evaluate_graph(graph)
-    v = c.get_virtual_copy()
-    new_entry = uuid4()
-    v[new_entry] = True
-
-    # v[new_entry] calls the right method
-    assert isinstance(v, InMemoryCacheVirtual)
-    assert v[new_entry].startswith("virtual")
-
-    # v[new_entry] does not hit v.cache
-    assert len(c.cache) == 2
-    assert len(v.cache) == 2 
-
-    # the two cache_catalog are different
-    assert new_entry not in c
-    assert new_entry in v
-    del v[new_entry]
-    assert new_entry not in v
-    for u in c:
-        del v[u]
-    assert len(v) == 0
-    assert len(c) == 2
diff --git a/packages-nextgen/kestrel_core/tests/test_cache_sqlite.py b/packages-nextgen/kestrel_core/tests/test_cache_sqlite.py
deleted file mode 100644
index 5db07fb6..00000000
--- a/packages-nextgen/kestrel_core/tests/test_cache_sqlite.py
+++ /dev/null
@@ -1,183 +0,0 @@
-from uuid import uuid4
-from pandas import DataFrame
-
-from kestrel.cache import SqliteCache
-from kestrel.cache.sqlite import SqliteCacheVirtual
-from kestrel.ir.graph import IRGraphEvaluable
-from kestrel.frontend.parser import parse_kestrel
-
-
-def test_sqlite_cache_set_get_del():
-    c = SqliteCache()
-    idx = uuid4()
-    df = DataFrame({'foo': [1, 2, 3]})
-    c[idx] = df
-    assert df.equals(c[idx])
-    del c[idx]
-    assert idx not in c
-
-
-def test_sqlite_cache_constructor():
-    ids = [uuid4() for i in range(5)]
-    df = DataFrame({'foo': [1, 2, 3]})
-    c = SqliteCache({x:df for x in ids})
-    for u in ids:
-        assert df.equals(c[u])
-    for u in ids:
-        del c[u]
-        assert u not in c
-
-
-def test_eval_new_disp():
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-DISP proclist ATTR name
-"""
-    graph = IRGraphEvaluable(parse_kestrel(stmt))
-    c = SqliteCache()
-    mapping = c.evaluate_graph(graph)
-
-    # check the return is correct
-    rets = graph.get_returns()
-    assert len(rets) == 1
-    df = mapping[rets[0].id]
-    assert df.to_dict("records") == [ {"name": "cmd.exe"}
-                                    , {"name": "explorer.exe"}
-                                    , {"name": "firefox.exe"}
-                                    , {"name": "chrome.exe"}
-                                    ]
-
-
-def test_eval_new_filter_disp():
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-browsers = proclist WHERE name = 'firefox.exe' OR name = 'chrome.exe'
-DISP browsers ATTR name, pid
-"""
-    graph = IRGraphEvaluable(parse_kestrel(stmt))
-    c = SqliteCache()
-    mapping = c.evaluate_graph(graph)
-
-    # check the return is correct
-    rets = graph.get_returns()
-    assert len(rets) == 1
-    df = mapping[rets[0].id]
-    assert df.to_dict("records") == [ {"name": "firefox.exe", "pid": 201}
-                                    , {"name": "chrome.exe", "pid": 205}
-                                    ]
-
-    
-def test_eval_two_returns():
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-browsers = proclist WHERE name != "cmd.exe"
-DISP browsers
-DISP browsers ATTR pid
-"""
-    graph = parse_kestrel(stmt)
-    c = SqliteCache()
-    rets = graph.get_returns()
-
-    # first DISP
-    gs = graph.find_dependent_subgraphs_of_node(rets[0], c)
-    assert len(gs) == 1
-    mapping = c.evaluate_graph(gs[0])
-    df1 = DataFrame([ {"name": "explorer.exe", "pid": 99}
-                    , {"name": "firefox.exe", "pid": 201}
-                    , {"name": "chrome.exe", "pid": 205}
-                    ])
-    assert len(mapping) == 1
-    assert df1.equals(mapping[rets[0].id])
-
-    # second DISP
-    gs = graph.find_dependent_subgraphs_of_node(rets[1], c)
-    assert len(gs) == 1
-    mapping = c.evaluate_graph(gs[0])
-    df2 = DataFrame([ {"pid": 99}
-                    , {"pid": 201}
-                    , {"pid": 205}
-                    ])
-    assert len(mapping) == 1
-    assert df2.equals(mapping[rets[1].id])
-
-
-def test_issue_446():
-    """The `WHERE name IN ...` below was raising `sqlalchemy.exc.StatementError: (builtins.KeyError) 'name_1'`
-    https://github.com/opencybersecurityalliance/kestrel-lang/issues/446
-    """
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-browsers = proclist WHERE name IN ("explorer.exe", "firefox.exe", "chrome.exe")
-"""
-    graph = IRGraphEvaluable(parse_kestrel(stmt))
-    c = SqliteCache()
-    _ = c.evaluate_graph(graph)
-
-
-def test_eval_filter_with_ref():
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-browsers = proclist WHERE name = 'firefox.exe' OR name = 'chrome.exe'
-specials = proclist WHERE pid IN [123, 201]
-p2 = proclist WHERE pid = browsers.pid and name = specials.name
-DISP p2 ATTR name, pid
-"""
-    graph = IRGraphEvaluable(parse_kestrel(stmt))
-    c = SqliteCache()
-    mapping = c.evaluate_graph(graph)
-
-    # check the return is correct
-    rets = graph.get_returns()
-    assert len(rets) == 1
-    df = mapping[rets[0].id]
-    assert df.to_dict("records") == [ {"name": "firefox.exe", "pid": 201} ]
-
-def test_get_virtual_copy():
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-browsers = proclist WHERE name = 'firefox.exe' OR name = 'chrome.exe'
-"""
-    graph = IRGraphEvaluable(parse_kestrel(stmt))
-    c = SqliteCache()
-    mapping = c.evaluate_graph(graph)
-    v = c.get_virtual_copy()
-    new_entry = uuid4()
-    v[new_entry] = True
-
-    # v[new_entry] calls the right method
-    assert isinstance(v, SqliteCacheVirtual)
-    assert v[new_entry].endswith("v")
-
-    # the two cache_catalog are different
-    assert new_entry not in c
-    assert new_entry in v
-    del v[new_entry]
-    assert new_entry not in v
-    for u in c:
-        del v[u]
-    assert len(v) == 0
-    assert len(c) == 1
diff --git a/packages-nextgen/kestrel_core/tests/test_config.py b/packages-nextgen/kestrel_core/tests/test_config.py
deleted file mode 100644
index 2fcec65a..00000000
--- a/packages-nextgen/kestrel_core/tests/test_config.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import kestrel.config.utils as cfg
-import os
-
-
-def test_env_vars_in_config():
-
-    test_config = """---
-credentials:
-  username: $TEST_USER
-  password: $TEST_PASSWORD
-    """
-    os.environ["TEST_USER"] = "test-user"
-    os.environ["TEST_PASSWORD"] = "test-password"
-    os.environ["KESTREL_CONFIG"] = os.path.join(os.sep, "tmp", "config.yaml")
-
-    with open(os.getenv("KESTREL_CONFIG"), "w") as fp:
-        fp.write(test_config)
-    config = cfg.load_config()
-    assert config["credentials"]["username"] == "test-user"
-    assert config["credentials"]["password"] == "test-password"
-
-
-def test_env_vars_in_config_overwrite():
-
-    test_config = """---
-credentials:
-  username: ${TEST_USER}
-  password: ${TEST_PASSWORD}
-debug:
-  cache_directory_prefix: $KESTREL_CACHE_DIRECTORY_PREFIX
-    """
-    os.environ["TEST_USER"] = "test-user"
-    os.environ["TEST_PASSWORD"] = "test-password"
-    os.environ["KESTREL_CONFIG"] = os.path.join(os.sep, "tmp", "config.yaml")
-    os.environ["KESTREL_CACHE_DIRECTORY_PREFIX"] = "Kestrel2.0-"
-    with open(os.getenv("KESTREL_CONFIG"), "w") as fp:
-        fp.write(test_config)
-    config = cfg.load_config()
-    assert config["credentials"]["username"] == "test-user"
-    assert config["credentials"]["password"] == "test-password"
-    assert config["debug"]["cache_directory_prefix"] == "Kestrel2.0-"
-
-def test_empty_env_var_in_config():
-    test_config = """---
-credentials:
-  username: ${TEST_USER}
-  password: ${TEST_PASSWORD}
-debug:
-  cache_directory_prefix: $I_DONT_EXIST
-    """
-    os.environ["TEST_USER"] = "test-user"
-    os.environ["TEST_PASSWORD"] = "test-password"
-    os.environ["KESTREL_CONFIG"] = os.path.join(os.sep, "tmp", "config.yaml")
-    os.environ["KESTREL_CACHE_DIRECTORY_PREFIX"] = "Kestrel2.0-"
-    with open(os.getenv("KESTREL_CONFIG"), "w") as fp:
-        fp.write(test_config)
-    config = cfg.load_config()
-    assert config["credentials"]["username"] == "test-user"
-    assert config["credentials"]["password"] == "test-password"
-    assert config["debug"]["cache_directory_prefix"] == "$I_DONT_EXIST"
\ No newline at end of file
diff --git a/packages-nextgen/kestrel_core/tests/test_interface_datasource_codegen_dataframe.py b/packages-nextgen/kestrel_core/tests/test_interface_datasource_codegen_dataframe.py
deleted file mode 100644
index 4f9f7507..00000000
--- a/packages-nextgen/kestrel_core/tests/test_interface_datasource_codegen_dataframe.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import pytest
-from pandas import DataFrame
-
-from kestrel.interface.codegen.dataframe import (
-    evaluate_source_instruction,
-    evaluate_transforming_instruction,
-)
-
-from kestrel.ir.instructions import (
-    Construct,
-    Variable,
-    Filter,
-    Limit,
-    ProjectAttrs,
-)
-
-from kestrel.frontend.parser import parse_kestrel
-
-
-def test_evaluate_Construct():
-    data = [ {"name": "cmd.exe", "pid": 123}
-           , {"name": "explorer.exe", "pid": 99}
-           , {"name": "firefox.exe", "pid": 201}
-           , {"name": "chrome.exe", "pid": 205}
-           ]
-    ins = Construct(data)
-    df = evaluate_source_instruction(ins)
-    assert df.equals(DataFrame(data))
-
-
-def test_non_exist_eval():
-    with pytest.raises(NotImplementedError):
-        evaluate_transforming_instruction(Variable("asdf"), DataFrame())
-
-
-def test_evaluate_Limit():
-    data = [ {"name": "cmd.exe", "pid": 123}
-           , {"name": "explorer.exe", "pid": 99}
-           , {"name": "firefox.exe", "pid": 201}
-           , {"name": "chrome.exe", "pid": 205}
-           ]
-    df = DataFrame(data)
-    dfx = evaluate_transforming_instruction(Limit(2), df)
-    assert dfx.equals(df.head(2))
-
-
-def test_evaluate_ProjectAttrs():
-    data = [ {"name": "cmd.exe", "pid": 123}
-           , {"name": "explorer.exe", "pid": 99}
-           , {"name": "firefox.exe", "pid": 201}
-           , {"name": "chrome.exe", "pid": 205}
-           ]
-    df = DataFrame(data)
-    dfx = evaluate_transforming_instruction(ProjectAttrs(["name"]), df)
-    assert dfx.equals(df[["name"]])
-
-
-def test_evaluate_Construct_Filter_ProjectAttrs():
-    stmt = r"""
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-browsers = proclist WHERE name = 'firefox.exe' OR name = 'chrome.exe'
-DISP browsers ATTR name, pid
-p2 = proclist WHERE pid > 100
-p3 = proclist WHERE name LIKE "c%.exe"
-p4 = proclist WHERE name MATCHES r"^c\w{2}\.exe"
-"""
-    graph = parse_kestrel(stmt)
-    c = graph.get_nodes_by_type(Construct)[0]
-    df0 = evaluate_source_instruction(c)
-    assert df0.to_dict("records") == [ {"name": "cmd.exe", "pid": 123}
-                                     , {"name": "explorer.exe", "pid": 99}
-                                     , {"name": "firefox.exe", "pid": 201}
-                                     , {"name": "chrome.exe", "pid": 205}
-                                     ]
-
-    browsers = graph.get_variable("browsers")
-    ft = next(graph.predecessors(browsers))
-    dfx = evaluate_transforming_instruction(ft, df0)
-    assert dfx.to_dict("records") == [ {"name": "firefox.exe", "pid": 201}
-                                     , {"name": "chrome.exe", "pid": 205}
-                                     ]
-    proj = next(graph.successors(browsers))
-    dfy = evaluate_transforming_instruction(proj, dfx)
-    assert dfx.to_dict("records") == [ {"name": "firefox.exe", "pid": 201}
-                                     , {"name": "chrome.exe", "pid": 205}
-                                     ]
-
-    ft = next(graph.predecessors(graph.get_variable("p2")))
-    dfx = evaluate_transforming_instruction(ft, df0)
-    assert dfx.to_dict("records") == [ {"name": "cmd.exe", "pid": 123}
-                                     , {"name": "firefox.exe", "pid": 201}
-                                     , {"name": "chrome.exe", "pid": 205}
-                                     ]
-
-    ft = next(graph.predecessors(graph.get_variable("p3")))
-    dfx = evaluate_transforming_instruction(ft, df0)
-    assert dfx.to_dict("records") == [ {"name": "cmd.exe", "pid": 123}
-                                     , {"name": "chrome.exe", "pid": 205}
-                                     ]
-
-    ft = next(graph.predecessors(graph.get_variable("p4")))
-    dfx = evaluate_transforming_instruction(ft, df0)
-    assert dfx.to_dict("records") == [ {"name": "cmd.exe", "pid": 123} ]
diff --git a/packages-nextgen/kestrel_core/tests/test_interface_datasource_codegen_sql.py b/packages-nextgen/kestrel_core/tests/test_interface_datasource_codegen_sql.py
deleted file mode 100644
index 1cc3c46c..00000000
--- a/packages-nextgen/kestrel_core/tests/test_interface_datasource_codegen_sql.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from datetime import datetime
-from dateutil import parser
-
-from kestrel.interface.codegen.sql import SqlTranslator
-from kestrel.ir.filter import (
-    BoolExp,
-    ExpOp,
-    FComparison,
-    IntComparison,
-    ListOp,
-    ListComparison,
-    MultiComp,
-    NumCompOp,
-    StrCompOp,
-    StrComparison,
-    TimeRange,
-)
-from kestrel.ir.instructions import (
-    DataSource,
-    Filter,
-    Limit,
-    Offset,
-    ProjectAttrs,
-    ProjectEntity,
-    Sort,
-)
-
-# Use sqlite3 for testing
-import sqlalchemy
-
-import pytest
-
-
-def _dt(timestr: str) -> datetime:
-    return parser.parse(timestr)
-
-
-def _time2string(ts: datetime) -> str:
-    return ts.strftime('%Y-%m-%dT%H:%M:%S.%f')
-
-
-def _remove_nl(s):
-    return s.replace('\n', '')
-
-
-@pytest.mark.parametrize(
-    "iseq, sql", [
-        # Try a simple filter
-        ([Filter(IntComparison('foo', NumCompOp.GE, 0))],
-         "SELECT * FROM my_table WHERE foo >= ?"),
-        # Try a simple filter with sorting
-        ([Filter(IntComparison('foo', NumCompOp.GE, 0)), Sort('bar')],
-         "SELECT * FROM my_table WHERE foo >= ? ORDER BY bar DESC"),
-        # Simple filter plus time range
-        ([Filter(IntComparison('foo', NumCompOp.GE, 0), timerange=TimeRange(_dt('2023-12-06T08:17:00Z'), _dt('2023-12-07T08:17:00Z')))],
-         "SELECT * FROM my_table WHERE foo >= ? AND timestamp >= ? AND timestamp < ?"),
-        # sqlalchemy's sqlite dialect seems to always add the offset
-        ([Limit(3), ProjectAttrs(['foo', 'bar', 'baz']), Filter(StrComparison('foo', StrCompOp.EQ, 'abc'))],
-         "SELECT foo, bar, baz FROM my_table WHERE foo = ? LIMIT ? OFFSET ?"),
-        # Same as above but reverse order
-        ([Filter(StrComparison('foo', StrCompOp.EQ, 'abc')), ProjectAttrs(['foo', 'bar', 'baz']), Limit(3)],
-         "SELECT foo, bar, baz FROM my_table WHERE foo = ? LIMIT ? OFFSET ?"),
-        ([Filter(ListComparison('foo', ListOp.NIN, ['abc', 'def']))],
-         "SELECT * FROM my_table WHERE (foo NOT IN (__[POSTCOMPILE_foo_1]))"), # POSTCOMPILE is some SQLAlchemy-ism
-        ([Filter(StrComparison('foo', StrCompOp.MATCHES, '.*abc.*'))],
-         "SELECT * FROM my_table WHERE foo REGEXP ?"),
-        ([Filter(StrComparison('foo', StrCompOp.NMATCHES, '.*abc.*'))],
-         "SELECT * FROM my_table WHERE foo NOT REGEXP ?"),
-        ([Filter(MultiComp(ExpOp.OR, [IntComparison('foo', NumCompOp.EQ, 1), IntComparison('bar', NumCompOp.EQ, 1)]))],
-         "SELECT * FROM my_table WHERE foo = ? OR bar = ?"),
-        ([Filter(MultiComp(ExpOp.AND, [IntComparison('foo', NumCompOp.EQ, 1), IntComparison('bar', NumCompOp.EQ, 1)]))],
-         "SELECT * FROM my_table WHERE foo = ? AND bar = ?"),
-        ([Limit(1000), Offset(2000)],
-         "SELECT * FROM my_table LIMIT ? OFFSET ?"),
-    ]
-)
-def test_sql_translator(iseq, sql):
-    trans = SqlTranslator(sqlalchemy.dialects.sqlite.dialect(), _time2string, "timestamp", sqlalchemy.table("my_table"))
-    for i in iseq:
-        trans.add_instruction(i)
-    result = trans.result()
-    assert _remove_nl(str(result)) == sql
diff --git a/packages-nextgen/kestrel_core/tests/test_ir_filter.py b/packages-nextgen/kestrel_core/tests/test_ir_filter.py
deleted file mode 100644
index 1e248df8..00000000
--- a/packages-nextgen/kestrel_core/tests/test_ir_filter.py
+++ /dev/null
@@ -1,144 +0,0 @@
-import json
-
-from kestrel.frontend.parser import parse_kestrel
-from kestrel.ir.filter import (
-    IntComparison, FloatComparison, StrComparison, ListComparison,
-    RefComparison, ReferenceValue, ListOp, NumCompOp, StrCompOp, ExpOp,
-    BoolExp, MultiComp, get_references_from_exp, resolve_reference_with_function,
-)
-from kestrel.ir.instructions import (
-    Filter,
-    instruction_from_json,
-)
-
-import pytest
-
-
-@pytest.mark.parametrize(
-    "field, op, value", [
-        ("foo", StrCompOp.EQ, "bar"),
-        ("foo", NumCompOp.EQ, 42),
-        ("foo", NumCompOp.EQ, 3.14),
-        ("foo", StrCompOp.NEQ, "bar"),
-        ("foo", NumCompOp.NEQ, 42),
-        ("foo", NumCompOp.NEQ, 3.14),
-        ("foo", StrCompOp.LIKE, "%bar"),
-        ("foo", StrCompOp.NLIKE, "%bar"),
-    ]
-)
-def test_comparison(field, op, value):
-    if isinstance(value, int):
-        comp = IntComparison(field=field, op=op, value=value)
-    elif isinstance(value, float):
-        comp = FloatComparison(field=field, op=op, value=value)
-    else:
-        comp = StrComparison(field=field, op=op, value=value)
-    assert comp.field == field
-    assert comp.op == op
-    assert comp.value == value
-    json_data: str = comp.to_json()
-    data: dict = json.loads(json_data)
-    assert data["field"] == field
-    assert data["op"] == op
-    assert data["value"] == value
-    if isinstance(value, int):
-        comp2 = IntComparison.from_json(json_data)
-    elif isinstance(value, float):
-        comp2 = FloatComparison.from_json(json_data)
-    else:
-        comp2 = StrComparison.from_json(json_data)
-    assert comp == comp2
-
-
-@pytest.mark.parametrize(
-    "field, op, value", [
-        ("foo", ListOp.IN, ["a", "b", "c"]),
-        ("foo", ListOp.NIN, ["a", "b", "c"]),
-        ("foo", ListOp.IN, [1, 2, 3]),
-        ("foo", ListOp.NIN, [1, 2, 3]),
-    ]
-)
-def test_list_comparison(field, op, value):
-    comp = ListComparison(field=field, op=op, value=value)
-    assert comp.field == field
-    assert comp.op == op
-    assert comp.value == value
-    json_data: str = comp.to_json()
-    data: dict = json.loads(json_data)
-    assert data["field"] == field
-    assert data["op"] == op
-    assert data["value"] == value
-    comp2 = ListComparison.from_json(json_data)
-    assert comp == comp2
-
-
-
-
-def test_multi_comparison():
-    comp1 = StrComparison("foo", StrCompOp.EQ, "X")
-    comp2 = StrComparison("bar", StrCompOp.EQ, "Y")
-    comp3 = StrComparison("baz", StrCompOp.EQ, "Z")
-    mcomp = MultiComp(ExpOp.OR, [comp1, comp2, comp3])
-    data = mcomp.to_json()
-    mcomp2 = MultiComp.from_json(data)
-    assert mcomp == mcomp2
-
-
-@pytest.mark.parametrize(
-    "lhs, op, rhs", [
-        (StrComparison("foo", StrCompOp.EQ, "bar"), ExpOp.AND, IntComparison("baz", NumCompOp.EQ, 42)),
-        (StrComparison("foo", StrCompOp.LIKE, "%bar%"), ExpOp.OR, IntComparison("baz", NumCompOp.LE, 42)),
-        (IntComparison("baz", NumCompOp.GE, 42), ExpOp.AND, StrComparison("foo", StrCompOp.NEQ, "bar")),
-        (IntComparison("baz", NumCompOp.NEQ, 42), ExpOp.OR, StrComparison("foo", StrCompOp.EQ, "bar")),
-        (StrComparison("foo", StrCompOp.EQ, "bar"), ExpOp.AND, ListComparison("baz", ListOp.IN, ["a", "b", "c"])),
-        (StrComparison("foo", StrCompOp.EQ, "bar"), ExpOp.OR, ListComparison("baz", ListOp.IN, [1, 2, 3])),
-        (ListComparison("baz", ListOp.IN, ["a", "b", "c"]), ExpOp.AND, StrComparison("foo", StrCompOp.EQ, "bar")),
-        (ListComparison("baz", ListOp.IN, [1, 2, 3]), ExpOp.OR, StrComparison("foo", StrCompOp.EQ, "bar")),
-        (StrComparison("foo", StrCompOp.EQ, "X"), ExpOp.AND,
-         MultiComp(ExpOp.OR, [StrComparison("bar", StrCompOp.EQ, "A"), StrComparison("baz", StrCompOp.EQ, "B")])),
-    ]
-)
-def test_bool_exp(lhs, op, rhs):
-    exp = BoolExp(lhs, op, rhs)
-    data = exp.to_json()
-    exp2 = BoolExp.from_json(data)
-    assert exp == exp2
-
-    # Also test Filter
-    filt = Filter(exp)
-    data = filt.to_json()
-    filt2 = instruction_from_json(data)
-    assert filt == filt2
-
-
-def test_filter_compound_exp():
-    comp1 = StrComparison("foo", StrCompOp.EQ, "bar")
-    comp2 = IntComparison("baz", NumCompOp.EQ, 42)
-    exp1 = BoolExp(comp1, ExpOp.AND, comp2)
-    comp3 = StrComparison("thing1", StrCompOp.NEQ, "abc")
-    comp4 = ListComparison("thing2", ListOp.IN, [1, 2, 3])
-    exp2 = BoolExp(comp3, ExpOp.OR, comp4)
-    exp3 = BoolExp(exp1, ExpOp.AND, exp2)
-    filt = Filter(exp3)
-    data = filt.to_json()
-    filt2 = instruction_from_json(data)
-    assert filt == filt2
-
-
-def test_filter_with_reference():
-    stmt = "x = y WHERE foo = 'bar' OR baz = z.baz"
-    graph = parse_kestrel(stmt)
-    filter_nodes = graph.get_nodes_by_type(Filter)
-    exp = filter_nodes[0].exp
-    exp_dict = exp.to_dict()
-    assert exp_dict == {'lhs': {'field': 'foo', 'op': '=', 'value': 'bar'}, 'op': 'OR', 'rhs': {'field': 'baz', 'op': 'IN', 'value': {'reference': 'z', 'attribute': 'baz'}}}
-
-
-def test_fill_references_in_exp():
-    lhs = StrComparison("foo", StrCompOp.EQ, "bar")
-    rhs = RefComparison("baz", "=", ReferenceValue("var", "attr"))
-    exp = BoolExp(lhs, ExpOp.AND, rhs)
-    rs = get_references_from_exp(exp)
-    assert len(list(rs)) == 1
-    resolve_reference_with_function(exp, lambda x: 5)
-    assert exp.rhs.value == 5
diff --git a/packages-nextgen/kestrel_core/tests/test_ir_graph.py b/packages-nextgen/kestrel_core/tests/test_ir_graph.py
deleted file mode 100644
index cd77da7d..00000000
--- a/packages-nextgen/kestrel_core/tests/test_ir_graph.py
+++ /dev/null
@@ -1,406 +0,0 @@
-import pytest
-import networkx.utils
-from collections import Counter
-from pandas import DataFrame
-
-from kestrel.ir.instructions import (
-    Variable,
-    DataSource,
-    Reference,
-    Return,
-    Filter,
-    Construct,
-    ProjectAttrs,
-    ProjectEntity,
-    Instruction,
-    TransformingInstruction,
-    CACHE_INTERFACE_IDENTIFIER,
-)
-from kestrel.ir.filter import StrComparison, StrCompOp
-from kestrel.ir.graph import IRGraph, IRGraphSimpleQuery
-from kestrel.frontend.parser import parse_kestrel
-from kestrel.cache import InMemoryCache
-
-
-def test_add_get_datasource():
-    g = IRGraph()
-    g.add_datasource("stixshifter://abc")
-
-    s = g.add_datasource(DataSource("stixshifter://abc"))
-    assert len(g) == 1
-
-    s2 = DataSource("stixshifter://abcd")
-    g.add_datasource(s2)
-    assert len(g) == 2
-
-    assert set(g.get_datasources()) == {s, s2}
-    g.get_datasource("stixshifter", "abc") == s
-
-
-def test_add_same_node():
-    g = IRGraph()
-    n = Instruction()
-    s = g.add_node(n)
-    s = g.add_node(n)
-    assert len(g) == 1
-
-
-def test_get_node_by_id():
-    g = IRGraph()
-    n = Instruction()
-    s = g.add_node(n)
-    assert g.get_node_by_id(n.id) == n
-
-
-def test_get_nodes_by_type_and_attributes():
-    g = IRGraph()
-    s = g.add_datasource("stixshifter://abc")
-    v1 = g.add_variable("asdf", s)
-    v2 = g.add_variable("qwer", s)
-    v3 = g.add_variable("123", s)
-    ns = g.get_nodes_by_type_and_attributes(Variable, {"name": "asdf"})
-    assert ns == [v1]
-
-
-def test_get_returns():
-    g = IRGraph()
-    s = g.add_datasource("stixshifter://abc")
-    g.add_return(s)
-    g.add_return(s)
-    g.add_return(s)
-    rets = g.get_returns()
-    assert len(rets) == 3
-    assert [ret.sequence for ret in rets] == [0, 1, 2]
-    assert len(g.get_sink_nodes()) == 3
-
-
-def test_add_variable():
-    g = IRGraph()
-    s = g.add_datasource("stixshifter://abc")
-    v1 = g.add_variable("asdf", s)
-    assert len(g) == 2
-    assert len(g.edges()) == 1
-
-    v2 = g.add_variable("asdf", s)
-    assert len(g) == 3
-    assert len(g.edges()) == 2
-
-    v = Variable("asdf")
-    v3 = g.add_variable(v, s)
-    assert v == v3
-    v4 = g.add_variable(v, s)
-    assert v3 == v4
-
-    assert v1.version == 0
-    assert v2.version == 1
-    assert v3.version == 2
-    assert len(g) == 4
-    assert len(g.edges()) == 3
-
-
-def test_get_variables():
-    g = IRGraph()
-    s = g.add_datasource("stixshifter://abc")
-    v1 = g.add_variable("asdf", s)
-    v2 = g.add_variable("asdf", s)
-    v3 = g.add_variable("asdf", s)
-    vs = g.get_variables()
-    assert len(vs) == 1
-    assert vs[0].name == "asdf"
-
-
-def test_add_get_reference():
-    g = IRGraph()
-    s = g.add_node(DataSource("ss://ee"))
-    g.add_node(Variable("asdf"), s)
-    g.add_node(Reference("asdf"))
-    q1 = g.add_node(Reference("qwer"))
-    q2 = g.add_node(Reference("qwer"))
-    g.add_node(Variable("qwer"), s)
-    g.add_node(Reference("qwer"))
-    assert len(g) == 4
-    assert len(g.edges()) == 2
-
-    assert q1 == q2
-    assert g.get_reference("qwer") == q1
-    refs = g.get_references()
-    assert refs == [q1]
-
-
-def test_copy_graph():
-    g = IRGraph()
-    s = g.add_datasource("stixshifter://abc")
-    g2 = g.copy()
-    assert s in g2
-    for n in g2.nodes():
-        n.datasource = "eee"
-    assert s in g
-    assert s.datasource == "eee"
-
-
-def test_deepcopy_graph():
-    g = IRGraph()
-    s = g.add_datasource("stixshifter://abc")
-    g2 = g.deepcopy()
-    assert len(g2.nodes()) == 1
-    s2 = list(g2.nodes())[0]
-    s2.datasource = "eee"
-    assert s.datasource == "abc"
-    assert s2.datasource == "eee"
-
-
-def test_update_graph():
-    g = IRGraph()
-    s = g.add_datasource("stixshifter://abc")
-    v1 = g.add_variable("asdf", s)
-    v2 = g.add_variable("asdf", s)
-    v3 = g.add_variable("asdf", s)
-    r1 = g.add_return(v3)
-
-    g2 = IRGraph()
-    s2 = g2.add_datasource("stixshifter://abc")
-    v4 = g2.add_variable("asdf", g2.add_node(Reference("asdf")))
-    v5 = g2.add_variable("asdf", g2.add_node(TransformingInstruction(), s2))
-    r2 = g2.add_return(v5)
-
-    assert v1.version == 0
-    assert v2.version == 1
-    assert v3.version == 2
-    assert v4.version == 0
-    assert v5.version == 1
-    assert r1.sequence == 0
-    assert r2.sequence == 0
-    assert len(g) == 5
-    assert len(g2) == 6
-
-    g.update(g2)
-    assert v1.version == 0
-    assert v2.version == 1
-    assert v3.version == 2
-    assert v4.version == 3
-    assert v5.version == 4
-    assert r1.sequence == 0
-    assert r2.sequence == 1
-    assert len(g) == 9
-    assert s2 not in g
-    assert r1 in g
-    assert r2 in g
-    assert not g.get_references()
-    assert (v3, v4) in g.edges()
-    assert g.in_degree(v4) == 1
-    assert g.out_degree(v4) == 0
-
-
-def test_serialization_deserialization():
-    g1 = IRGraph()
-    s = g1.add_node(DataSource("ss://ee"))
-    r = g1.add_node(Reference("asdf"))
-    v = g1.add_node(Variable("asdf"), s)
-    j = g1.to_json()
-    g2 = IRGraph(j)
-    assert s in g2.nodes()
-    assert v in g2.nodes()
-    assert len(g2) == 3
-    assert g2.edges() == {(s,v)}
-
-
-def test_find_cached_dependent_subgraph_of_node():
-    g = IRGraph()
-
-    a1 = g.add_node(DataSource("ss://ee"))
-    a2 = g.add_node(Variable("asdf"), a1)
-    a3 = g.add_node(Instruction())
-    g.add_edge(a2, a3)
-    a4 = g.add_node(Variable("qwer"), a3)
-
-    b1 = g.add_node(DataSource("ss://eee"))
-    b2 = g.add_node(Variable("asdfe"), b1)
-    b3 = g.add_node(Instruction())
-    g.add_edge(b2, b3)
-    b4 = g.add_node(Variable("qwere"), b3)
-
-    c1 = g.add_node(Instruction())
-    g.add_edge(a4, c1)
-    g.add_edge(b4, c1)
-    c2 = g.add_node(Variable("zxcv"), c1)
-
-    g2 = g.find_cached_dependent_subgraph_of_node(c2, InMemoryCache())
-    assert networkx.utils.graphs_equal(g, g2)
-
-    g3 = g.find_cached_dependent_subgraph_of_node(c2, InMemoryCache({a2.id: DataFrame(), b2.id: DataFrame()}))
-    g.remove_node(a1)
-    g.remove_node(b1)
-    assert networkx.utils.graphs_equal(g, g3)
-
-
-def test_find_dependent_subgraphs_of_node_just_cache():
-    huntflow = """
-p1 = NEW process [ {"name": "cmd.exe", "pid": 123}
-                 , {"name": "explorer.exe", "pid": 99}
-                 , {"name": "firefox.exe", "pid": 201}
-                 , {"name": "chrome.exe", "pid": 205}
-                 ]
-
-browsers = p1 WHERE name = 'firefox.exe' OR name = 'chrome.exe'
-
-DISP browsers ATTR name
-"""
-    graph = parse_kestrel(huntflow)
-    c = InMemoryCache()
-    ret = graph.get_returns()[0]
-    gs = graph.find_dependent_subgraphs_of_node(ret, c)
-    assert len(gs) == 1
-    assert len(gs[0]) == 6
-    assert Counter(map(type, gs[0].nodes())) == Counter([Filter, Variable, Variable, Construct, ProjectAttrs, Return])
-    assert gs[0].interface == CACHE_INTERFACE_IDENTIFIER
-
-
-def test_get_trunk_n_branches_filter():
-    stmt = "y = x WHERE name = z.name AND pid = w.pid"
-    graph = parse_kestrel(stmt)
-    trunk, r2n = graph.get_trunk_n_branches(graph.get_nodes_by_type(Filter)[0])
-    assert trunk.name == "x"
-    for r,n in r2n.items():
-        assert next(graph.predecessors(n)).name == r.reference
-
-
-def test_get_trunk_n_branches_variable():
-    huntflow = """
-p1 = NEW process [ {"name": "cmd.exe", "pid": 123}
-                 , {"name": "explorer.exe", "pid": 99}
-                 , {"name": "firefox.exe", "pid": 201}
-                 , {"name": "chrome.exe", "pid": 205}
-                 ]
-"""
-    graph = parse_kestrel(huntflow)
-    trunk, r2n = graph.get_trunk_n_branches(graph.get_variable("p1"))
-    assert isinstance(trunk, Construct)
-    assert r2n == {}
-
-
-def test_find_dependent_subgraphs_of_node():
-    huntflow = """
-p1 = NEW process [ {"name": "cmd.exe", "pid": 123}
-                 , {"name": "explorer.exe", "pid": 99}
-                 , {"name": "firefox.exe", "pid": 201}
-                 , {"name": "chrome.exe", "pid": 205}
-                 ]
-
-browsers = p1 WHERE name = 'firefox.exe' OR name = 'chrome.exe'
-
-p2 = GET process FROM elastic://edr1
-     WHERE name = "cmd.exe"
-     LAST 5 DAYS
-
-p21 = p2 WHERE parent.name = "winword.exe"
-
-p3 = GET process FROM stixshifter://edr2
-     WHERE parent_ref.name = "powershell.exe"
-     LAST 24 HOURS
-
-p31 = p3 WHERE parent.name = "excel.exe"
-
-p4 = p21 WHERE pid = p1.pid
-p5 = GET process FROM stixshifter://edr5 WHERE pid = p4.pid
-
-DISP p5 ATTR pid, name, cmd_line
-"""
-    graph = parse_kestrel(huntflow)
-
-    p1 = graph.get_variable("p1")
-    p2 = graph.get_variable("p2")
-    p3 = graph.get_variable("p3")
-    p21 = graph.get_variable("p21")
-    p31 = graph.get_variable("p31")
-    p4 = graph.get_variable("p4")
-    p5 = graph.get_variable("p5")
-    ret = graph.get_returns()[0]
-
-    c = InMemoryCache()
-    gs = graph.find_dependent_subgraphs_of_node(ret, c)
-    assert len(gs) == 2
-    p1_projattr = [n for n in graph.successors(p1) if isinstance(n, ProjectAttrs)][0]
-    assert len(gs[0]) == 3
-    assert set(map(type, gs[0].nodes())) == {Variable, ProjectAttrs, Construct}
-    assert p1_projattr == gs[0].get_nodes_by_type(ProjectAttrs)[0]
-    assert len(gs[1]) == 6
-    assert Counter(map(type, gs[1].nodes())) == Counter([Filter, Filter, Variable, Variable, ProjectEntity, DataSource])
-
-    c.evaluate_graph(gs[0])
-    assert p1_projattr.id in c
-    assert p1.id in c
-    assert len(c) == 2
-    gs = graph.find_dependent_subgraphs_of_node(ret, c)
-    assert len(gs) == 1
-    assert len(gs[0]) == 11
-    assert p2 in gs[0]
-    assert p21 in gs[0]
-    assert p4 in gs[0]
-    assert Counter(map(type, gs[0].nodes())) == Counter([Filter, Filter, Filter, Variable, Variable, Variable, Variable, ProjectEntity, DataSource, ProjectAttrs, ProjectAttrs])
-
-    p4_projattr = next(graph.successors(p4))
-    c[p4_projattr.id] = DataFrame()
-    gs = graph.find_dependent_subgraphs_of_node(ret, c)
-    assert len(gs) == 1
-    assert len(gs[0]) == 8
-    assert p4_projattr.id in c
-    assert p4_projattr in gs[0]
-    assert p5 in gs[0]
-    assert ret in gs[0]
-    assert Counter(map(type, gs[0].nodes())) == Counter([Filter, Return, Variable, Variable, ProjectEntity, DataSource, ProjectAttrs, ProjectAttrs])
-
-
-def test_find_simple_query_subgraphs():
-    huntflow = """
-p1 = GET process FROM elastic://edr1
-     WHERE name = "cmd.exe"
-     LAST 5 DAYS
-
-p2 = GET process FROM elastic://edr1
-     WHERE pid = 999
-     LAST 30 MINUTES
-
-p3 = p1 WHERE pid = p2.pid
-
-p4 = GET process FROM elastic://edr2 WHERE name = p3.name
-
-DISP p4
-"""
-    graph = parse_kestrel(huntflow)
-    c = InMemoryCache()
-    gs = graph.find_dependent_subgraphs_of_node(graph.get_returns()[0], c)
-    assert len(gs) == 1
-    assert networkx.utils.graphs_equal(graph, gs[0])
-
-    vs = set(["p1", "p2"])
-    for g in gs[0].find_simple_query_subgraphs(c):
-        assert isinstance(g, IRGraphSimpleQuery)
-        assert Counter(map(type, g.nodes())) == Counter([Variable, Filter, ProjectEntity, DataSource])
-        assert len(g.edges()) == 3
-        varname = g.get_variables()[0].name
-        assert varname in vs
-        vs.remove(varname)
-    assert vs == set()
-
-    p1 = gs[0].get_variable("p1")
-    c[p1.id] = DataFrame()
-    p2 = gs[0].get_variable("p2")
-    c[p2.id] = DataFrame()
-
-    gs = graph.find_dependent_subgraphs_of_node(graph.get_returns()[0], c)
-    # just a dep graph in cache
-    assert len(gs) == 1
-    assert Counter(map(type, gs[0].nodes())) == Counter([Variable, Variable, Filter, ProjectAttrs, ProjectAttrs, Variable])
-    sinks = gs[0].get_sink_nodes()
-    assert len(sinks) == 1
-    sink = sinks[0]
-    assert isinstance(sink, ProjectAttrs) and sink.attrs == ['name']
-    c[sink.id] = DataFrame()
-
-    gs = graph.find_dependent_subgraphs_of_node(graph.get_returns()[0], c)
-    assert len(gs) == 1
-    assert sink in gs[0]
-    assert Counter(map(type, gs[0].nodes())) == Counter([Variable, Filter, ProjectAttrs, DataSource, Return, ProjectEntity, Variable])
-    for g in gs[0].find_simple_query_subgraphs(c):
-        assert Counter(map(type, g.nodes())) == Counter([ProjectAttrs, Variable, Filter, ProjectEntity, DataSource])
-        assert sink in g
diff --git a/packages-nextgen/kestrel_core/tests/test_ir_instructions.py b/packages-nextgen/kestrel_core/tests/test_ir_instructions.py
deleted file mode 100644
index f9a32410..00000000
--- a/packages-nextgen/kestrel_core/tests/test_ir_instructions.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import pytest
-
-from kestrel.ir.instructions import (
-    Variable,
-    DataSource,
-    Construct,
-    get_instruction_class,
-    instruction_from_dict,
-    instruction_from_json,
-    CACHE_INTERFACE_IDENTIFIER,
-)
-from kestrel.exceptions import (
-    InvalidSeralizedInstruction,
-    InvalidDataSource,
-)
-
-
-def test_instruction_post_init():
-    v = Variable("asdf")
-    j = v.to_dict()
-    assert "id" in j
-    assert "instruction" in j
-    assert j["instruction"] == "Variable"
-
-
-def test_stable_id():
-    v = Variable("asdf")
-    _id = v.id
-    v.name = "qwer"
-    assert v.id == _id
-
-
-def test_stable_hash():
-    s = DataSource("stixshifter://abc")
-    h1 = hash(s)
-    s.datasource = "abcd"
-    h2 = hash(s)
-    assert h1 == h2
-
-
-def test_eq():
-    s1 = DataSource("stixshifter://abc")
-    s2 = DataSource("stixshifter://abc")
-    s3 = instruction_from_dict(s1.to_dict())
-    assert s1 != s2
-    assert s1 == s3
-
-
-def test_get_instruction_class():
-    cls = get_instruction_class("Variable")
-    v = cls("asdf")
-    assert cls == Variable
-    assert isinstance(v, Variable)
-
-
-def test_add_source():
-    s = DataSource("stixshifter://abc")
-    j = s.to_dict()
-    assert j["interface"] == "stixshifter"
-    assert j["datasource"] == "abc"
-    assert "id" in j
-    assert "instruction" in j
-    assert "uri" not in j
-    assert "default_interface" not in j
-
-    x = DataSource("abc", "stixshifter")
-    assert x.interface == "stixshifter"
-    assert x.datasource == "abc"
-
-    with pytest.raises(InvalidDataSource):
-        DataSource("sss://eee://ccc")
-
-    with pytest.raises(InvalidDataSource):
-        DataSource("sss")
-
-
-def test_construct():
-    data = [ {"name": "cmd.exe", "pid": 123}
-           , {"name": "explorer.exe", "pid": 99}
-           , {"name": "firefox.exe", "pid": 201}
-           , {"name": "chrome.exe", "pid": 205}
-           ]
-    c = Construct(data)
-    assert c.data == data
-    assert c.interface == CACHE_INTERFACE_IDENTIFIER
-
-
-def test_instruction_from_dict():
-    v = Variable("asdf")
-    d = v.to_dict()
-    w = instruction_from_dict(d)
-    assert w == v
-
-    del d["id"]
-    with pytest.raises(InvalidSeralizedInstruction):
-        instruction_from_dict(d)
-
-
-def test_instruction_from_json():
-    v = Variable("asdf")
-    j = v.to_json()
-    w = instruction_from_json(j)
-    assert w == v
diff --git a/packages-nextgen/kestrel_core/tests/test_mapping_data_model.py b/packages-nextgen/kestrel_core/tests/test_mapping_data_model.py
deleted file mode 100644
index 93abe83e..00000000
--- a/packages-nextgen/kestrel_core/tests/test_mapping_data_model.py
+++ /dev/null
@@ -1,200 +0,0 @@
-import pytest
-
-import pandas as pd
-
-from kestrel.mapping.data_model import (
-    load_default_mapping,
-    reverse_mapping,
-    translate_comparison_to_native,
-    translate_comparison_to_ocsf,
-    translate_dataframe,
-    translate_projection_to_native,
-)
-
-
-# A "custom" mapping for an opensearch/elasticsearch datasource.
-# This mapping works with data from Blue Team Village's 2023 DefCon CTF, for example.
-WINLOGBEAT_MAPPING = {
-    "file": {
-        "path": "file.path",
-        "name": "file.name"
-    },
-    "process": {
-        "cmd_line": "winlog.event_data.CommandLine",
-        "pid": {
-            "native_field": "winlog.event_data.ProcessId",
-            "native_value": "to_str",
-            "ocsf_value": "to_int"
-        },
-        "uid": "winlog.event_data.ProcessGuid",
-        "file": {
-            "path": "winlog.event_data.Image",
-            "name": [
-                {
-                    "native_field": "winlog.event_data.Image",
-                    "native_op": "LIKE",
-                    "native_value": "endswith",
-                    "ocsf_value": "basename"
-                }
-            ],
-            "parent_folder": [
-                {
-                    "native_field": "winlog.event_data.Image",
-                    "native_op": "LIKE",
-                    "native_value": "startswith",
-                    "ocsf_value": "dirname"
-                }
-            ]
-        },
-        "parent_process": {
-            "cmd_line": "winlog.event_data.ParentCommandLine",
-            "pid": "winlog.event_data.ParentProcessId",
-            "uid": "winlog.event_data.ParentProcessGuid",
-            "file": {
-                "path": "winlog.event_data.ParentImage",
-                "name": [
-                    {
-                        "native_field": "winlog.event_data.ParentImage",
-                        "native_op": "LIKE",
-                        "native_value": "endswith",
-                        "ocsf_value": "basename"
-                    }
-                ],
-                "parent_folder": [
-                    {
-                        "native_field": "winlog.event_data.ParentImage",
-                        "native_op": "LIKE",
-                        "native_value": "startswith",
-                        "ocsf_value": "dirname"
-                    }
-                ]
-            }
-        }
-    },
-    "dst_endpoint": {
-        "ip": "winlog.event_data.DestinationIp",
-        "port": "winlog.event_data.DestinationPort"
-    },
-    "src_endpoint": {
-        "ip": "winlog.event_data.SourceIp",
-        "port": "winlog.event_data.SourcePort"
-    }
-}
-
-
-# Simplified subset of the standard mapping
-STIX_MAPPING = {
-    "device": {
-        "ip": "ipv4-addr:value"
-    },
-    "endpoint": {
-        "ip": "ipv4-addr:value"
-    },
-}
-
-
-# This mapping is used in 2 places:
-# - frontend comparison from ECS to OCSF
-# - backend comparison from OCSF to ECS (datasource)
-ECS_MAPPING = load_default_mapping("ecs")
-
-
-def test_reverse_mapping_ipv4():
-    reverse_map = reverse_mapping(STIX_MAPPING)
-    ipv4 = reverse_map["ipv4-addr:value"]
-    assert isinstance(ipv4, list)
-    assert set(ipv4) == {"device.ip", "endpoint.ip"}
-
-
-def test_reverse_mapping_executable():
-    reverse_map = reverse_mapping(ECS_MAPPING)
-    exe = reverse_map["process.executable"]
-    assert isinstance(exe, list)
-    assert "process.file.path" in exe
-    for item in exe:
-        if isinstance(item, dict):
-            assert "ocsf_field" in item
-            if item["ocsf_field"] == "process.file.name":
-                # Make sure all metadata from the mapping got reversed
-                assert item["native_value"] == "endswith"
-                assert item["native_op"] == "LIKE"
-                assert item["ocsf_value"] == "basename"
-
-
-
-@pytest.mark.parametrize(
-    "dmm, field, op, value, expected_result",
-    [
-        (WINLOGBEAT_MAPPING, "process.file.path", "=", "C:\\TMP\\foo.exe",
-         [("winlog.event_data.Image", "=", "C:\\TMP\\foo.exe")]),
-        (WINLOGBEAT_MAPPING, "process.file.name", "=", "foo.exe",
-         [("winlog.event_data.Image", "LIKE", "%\\foo.exe")]),
-        (ECS_MAPPING, "process.file.path", "=", "C:\\TMP\\foo.exe",
-         [("process.executable", "=", "C:\\TMP\\foo.exe")]),
-        (ECS_MAPPING, "process.file.name", "=", "foo.exe",
-         [("process.executable", "LIKE", "%\\foo.exe")]),
-    ],
-)
-def test_translate_comparison_to_native(dmm, field, op, value, expected_result):
-    assert translate_comparison_to_native(dmm, field, op, value) == expected_result
-
-
-@pytest.mark.parametrize(
-    "dmm, field, op, value, expected_result",
-    [
-        (ECS_MAPPING, "process.executable", "=", "C:\\TMP\\foo.exe",
-         [
-            ("process.file.path", "=", "C:\\TMP\\foo.exe"),
-            ("process.file.name", "=", "foo.exe"),
-            ("process.file.parent_folder", "=", "C:\\TMP"),
-         ]),
-        (ECS_MAPPING, "process.executable", "LIKE", "%\\foo.exe",
-         [
-            ("process.file.path", "LIKE", "%\\foo.exe"),
-            ("process.file.name", "LIKE", "foo.exe"),     #TODO: could optimize this to "="
-            ("process.file.parent_folder", "LIKE", "%"),  #TODO: could eliminate this?
-         ]),
-        (STIX_MAPPING, "ipv4-addr:value", "=", "198.51.100.13",
-         [
-             ("device.ip", "=", "198.51.100.13"),
-             ("endpoint.ip", "=", "198.51.100.13"),
-         ]),
-    ],
-)
-def test_translate_comparison_to_ocsf(dmm, field, op, value, expected_result):
-    """Test the translate function."""
-    reverse_dmm = reverse_mapping(dmm)  # Make the dmms fixtures?
-    assert set(translate_comparison_to_ocsf(reverse_dmm, field, op, value)) == set(expected_result)
-
-
-@pytest.mark.parametrize(
-    "dmm, entity, field, expected_result",
-    [
-        (WINLOGBEAT_MAPPING, "process", ["file.name", "pid"],
-         [("winlog.event_data.Image", "file.name"), ("winlog.event_data.ProcessId", "pid")]),
-        (WINLOGBEAT_MAPPING, "process", None,
-         [("winlog.event_data.CommandLine", "cmd_line"),
-          ("winlog.event_data.ProcessId", "pid"),
-          ("winlog.event_data.ProcessGuid", "uid"),
-          ("winlog.event_data.Image", "file.path"),
-          ("winlog.event_data.Image", "file.name"),
-          ("winlog.event_data.Image", "file.parent_folder"),
-          ("winlog.event_data.ParentCommandLine", "parent_process.cmd_line"),
-          ("winlog.event_data.ParentProcessId", "parent_process.pid"),
-          ("winlog.event_data.ParentProcessGuid", "parent_process.uid"),
-          ("winlog.event_data.ParentImage", "parent_process.file.path"),
-          ("winlog.event_data.ParentImage", "parent_process.file.name"),
-          ("winlog.event_data.ParentImage", "parent_process.file.parent_folder"),
-         ]),
-    ],
-)
-def test_translate_projection_to_native(dmm, entity, field, expected_result):
-    assert translate_projection_to_native(dmm, entity, field) == expected_result
-
-
-def test_translate_dataframe():  #TODO: more testing here
-    df = pd.DataFrame({"file.path": [r"C:\Windows\System32\cmd.exe", r"C:\TMP"],
-                       "pid": [1, 2]})
-    dmm = load_default_mapping("ecs")
-    df = translate_dataframe(df, dmm["process"])
-    #TODO:assert df["file.name"].iloc[0] == "cmd.exe"
diff --git a/packages-nextgen/kestrel_core/tests/test_mapping_transformers.py b/packages-nextgen/kestrel_core/tests/test_mapping_transformers.py
deleted file mode 100644
index 9e454925..00000000
--- a/packages-nextgen/kestrel_core/tests/test_mapping_transformers.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import pandas as pd
-import pytest
-
-from kestrel.mapping.transformers import (
-    run_transformer,
-    run_transformer_on_series,
-)
-
-
-@pytest.mark.parametrize(
-    "transform, value, expected", [
-        ("dirname", r"C:\Windows\System32\cmd.exe", r"C:\Windows\System32"),
-        ("basename", r"C:\Windows\System32\cmd.exe", r"cmd.exe"),
-        ("startswith", r"C:\Windows\System32", r"C:\Windows\System32\%"),
-        ("endswith", "cmd.exe", r"%\cmd.exe"),
-        ("to_int", 1234, 1234),
-        ("to_int", 1234.1234, 1234),  # Maybe this should fail?
-        ("to_int", "1234", 1234),
-        ("to_int", "0x4d2", 1234),
-        ("to_str", "1234", "1234"),
-        ("to_str", 1234, "1234"),
-        ("to_epoch_ms", "2024-03-29T12:57:56.926Z", 1711717076926),
-        ("to_epoch_ms", "2024-03-29T12:57:56.92Z", 1711717076920),
-        ("to_epoch_ms", "2024-03-29T12:57:56.9Z", 1711717076900),
-        ("to_epoch_ms", "2024-03-29T12:57:56Z", 1711717076000),
-    ]
-)
-def test_run_transformer(transform, value, expected):
-    assert run_transformer(transform, value) == expected
-
-
-def test_run_series_basename():
-    data = pd.Series([r"C:\Windows\System32\cmd.exe", r"C:\TMP"])
-    result = list(run_transformer_on_series("basename", data))
-    assert result == ["cmd.exe", "TMP"]
diff --git a/packages-nextgen/kestrel_core/tests/test_parser.py b/packages-nextgen/kestrel_core/tests/test_parser.py
deleted file mode 100644
index 1ca5d314..00000000
--- a/packages-nextgen/kestrel_core/tests/test_parser.py
+++ /dev/null
@@ -1,290 +0,0 @@
-import json
-import pytest
-from collections import Counter
-from datetime import datetime, timedelta, timezone
-
-from kestrel.frontend.parser import parse_kestrel
-from kestrel.ir.graph import IRGraph
-from kestrel.ir.filter import ReferenceValue
-from kestrel.ir.instructions import (
-    Construct,
-    DataSource,
-    Filter,
-    Limit,
-    Offset,
-    ProjectAttrs,
-    ProjectEntity,
-    Reference,
-    Sort,
-    Variable,
-    Explain,
-    Return,
-)
-
-
-@pytest.mark.parametrize(
-    "stmt", [
-        "x = GET thing FROM if://ds WHERE foo = 'bar'",
-        "x = GET thing FROM if://ds WHERE foo > 1.5",
-        r"x = GET thing FROM if://ds WHERE foo = r'C:\TMP'",
-        "x = GET thing FROM if://ds WHERE foo = 'bar' OR baz != 42",
-        "x = GET thing FROM if://ds WHERE foo = 'bar' AND baz IN (1, 2, 3)",
-        "x = GET thing FROM if://ds WHERE foo = 'bar' AND baz IN (1)",
-        "x = GET thing FROM if://ds WHERE foo = 'bar' AND baz IN (1) LAST 3 DAYS",
-    ]
-)
-def test_parser_get_statements(stmt):
-    """
-    This test isn't meant to be comprehensive, but checks basic transformer functionality.
-
-    This will need to be updated as we build out the new Transformer
-    """
-
-    graph = parse_kestrel(stmt)
-    assert len(graph) == 4
-    assert len(graph.get_nodes_by_type(Variable)) == 1
-    assert len(graph.get_nodes_by_type(ProjectEntity)) == 1
-    assert len(graph.get_nodes_by_type(DataSource)) == 1
-    assert len(graph.get_nodes_by_type(Filter)) == 1
-
-    # Ensure result is serializable
-    _ = graph.to_json()
-
-
-def test_parser_get_timespan_relative():
-    stmt = "x = GET url FROM if://ds WHERE url = 'http://example.com/' LAST 5h"
-    graph = parse_kestrel(stmt)
-    filt_list = graph.get_nodes_by_type(Filter)
-    assert len(filt_list) == 1
-    filt = filt_list[0]
-    delta = filt.timerange.stop - filt.timerange.start
-    assert delta == timedelta(hours=5)
-
-
-def test_parser_get_timespan_absolute():
-    stmt = ("x = GET url FROM if://ds WHERE url = 'http://example.com/'"
-            " START '2023-11-29T00:00:00Z' STOP '2023-11-29T05:00:00Z'")
-    graph = parse_kestrel(stmt)
-    filt_list = graph.get_nodes_by_type(Filter)
-    assert len(filt_list) == 1
-    filt = filt_list[0]
-    delta = filt.timerange.stop - filt.timerange.start
-    assert delta == timedelta(hours=5)
-    assert filt.timerange.start == datetime(2023, 11, 29, 0, 0, tzinfo=timezone.utc)
-    assert filt.timerange.stop == datetime(2023, 11, 29, 5, 0, tzinfo=timezone.utc)
-
-
-@pytest.mark.parametrize(
-    "stmt, expected", [
-        ("x = GET url FROM if://ds WHERE url = 'http://example.com/' LIMIT 1", 1),
-        ("x = GET url FROM if://ds WHERE url = 'http://example.com/' LAST 3d LIMIT 2", 2),
-        (("x = GET url FROM if://ds WHERE url = 'http://example.com/'"
-          " START '2023-11-29T00:00:00Z' STOP '2023-11-29T05:00:00Z' LIMIT 3"), 3),
-    ]
-)
-def test_parser_get_with_limit(stmt, expected):
-    graph = parse_kestrel(stmt)
-    limits = graph.get_nodes_by_type(Limit)
-    assert len(limits) == 1
-    limit = limits[0]
-    assert limit.num == expected
-
-
-def get_parsed_filter_exp(stmt):
-    parse_tree = parse_kestrel(stmt)
-    filter_node = parse_tree.get_nodes_by_type(Filter).pop()
-    return filter_node.exp
-
-
-def test_parser_mapping_single_comparison_to_single_value():
-    # test for attributes in the form entity_name:property_name
-    stmt = "x = GET process FROM if://ds WHERE process:binary_ref.name = 'foo'"
-    parse_filter = get_parsed_filter_exp(stmt)
-    assert parse_filter.field == 'file.name'
-    # test when entity name is not included in the attributes
-    stmt = "x = GET process FROM if://ds WHERE binary_ref.name = 'foo'"
-    parse_filter = get_parsed_filter_exp(stmt)
-    assert parse_filter.field == 'file.name'
-
-
-def test_parser_mapping_single_comparison_to_multiple_values():
-    stmt = "x = GET ipv4-addr FROM if://ds WHERE value = '192.168.22.3'"
-    parse_filter = get_parsed_filter_exp(stmt)
-    comps = parse_filter.comps
-    assert isinstance(comps, list) and len(comps) == 4
-    fields = [x.field for x in comps]
-    assert ("dst_endpoint.ip" in fields and "src_endpoint.ip" in fields and
-            "device.ip" in fields and "endpoint.ip" in fields)
-
-
-def test_parser_mapping_multiple_comparison_to_multiple_values():
-    stmt = "x = GET process FROM if://ds WHERE binary_ref.name = 'foo' "\
-        "OR name = 'bam' AND parent_ref.name = 'boom'"
-    parse_filter = get_parsed_filter_exp(stmt)
-    field1 = parse_filter.lhs.field
-    assert field1 == 'file.name'
-    field2 = parse_filter.rhs.lhs.field
-    assert field2 == 'name'  # 'process.name'
-    field3 = parse_filter.rhs.rhs.field
-    assert field3 == "parent_process.name"
-
-
-def test_parser_new_json():
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-"""
-    graph = parse_kestrel(stmt)
-    cs = graph.get_nodes_by_type(Construct)
-    assert len(cs) == 1
-    construct = cs[0]
-    df = [ {"name": "cmd.exe", "pid": 123}
-         , {"name": "explorer.exe", "pid": 99}
-         , {"name": "firefox.exe", "pid": 201}
-         , {"name": "chrome.exe", "pid": 205}
-         ]
-    assert df == construct.data
-    vs = graph.get_variables()
-    assert len(vs) == 1
-    assert vs[0].name == "proclist"
-
-
-@pytest.mark.parametrize(
-    "stmt, node_cnt", [
-        ("x = y WHERE foo = 'bar'", 3),
-        ("x = y WHERE foo > 1.5", 3),
-        (r"x = y WHERE foo = r'C:\TMP'", 3),
-        ("x = y WHERE foo = 'bar' OR baz != 42", 3),
-        ("x = y WHERE foo = 'bar' AND baz IN (1, 2, 3)", 3),
-        ("x = y WHERE foo = 'bar' AND baz IN (1)", 3),
-        ("x = y WHERE foo = 'bar' SORT BY foo ASC LIMIT 3", 5),
-        ("x = y WHERE foo = 'bar' SORT BY foo ASC LIMIT 3 OFFSET 9", 6),
-    ]
-)
-def test_parser_expression(stmt, node_cnt):
-    """
-    This test isn't meant to be comprehensive, but checks basic transformer functionality.
-
-    This will need to be updated as we build out the new Transformer
-    """
-
-    graph = parse_kestrel(stmt)
-    assert len(graph) == node_cnt
-    assert len(graph.get_nodes_by_type(Variable)) == 1
-    assert len(graph.get_nodes_by_type(Reference)) == 1
-    assert len(graph.get_nodes_by_type(Filter)) == 1
-    assert len(graph.get_nodes_by_type(Sort)) in (0, 1)
-    assert len(graph.get_nodes_by_type(Limit)) in (0, 1)
-    assert len(graph.get_nodes_by_type(Offset)) in (0, 1)
-
-
-def test_three_statements_in_a_line():
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-browsers = proclist WHERE name = 'firefox.exe' OR name = 'chrome.exe'
-DISP browsers ATTR name, pid
-"""
-    graph = parse_kestrel(stmt)
-    assert len(graph) == 6
-    c = graph.get_nodes_by_type(Construct)[0]
-    assert {"proclist", "browsers"} == {v.name for v in graph.get_variables()}
-    proclist = graph.get_variable("proclist")
-    browsers = graph.get_variable("browsers")
-    proj = graph.get_nodes_by_type(ProjectAttrs)[0]
-    assert proj.attrs == ['name', 'pid']
-    ft = graph.get_nodes_by_type(Filter)[0]
-    assert ft.exp.to_dict() == {"lhs": {"field": "name", "op": "=", "value": "firefox.exe"}, "op": "OR", "rhs": {"field": "name", "op": "=", "value": "chrome.exe"}}
-    ret = graph.get_returns()[0]
-    assert len(graph.edges) == 5
-    assert (c, proclist) in graph.edges
-    assert (proclist, ft) in graph.edges
-    assert (ft, browsers) in graph.edges
-    assert (browsers, proj) in graph.edges
-    assert (proj, ret) in graph.edges
-
-
-@pytest.mark.parametrize(
-    "stmt, node_cnt, expected", [
-        ("x = y WHERE foo = z.foo", 5, [ReferenceValue("z", "foo")]),
-        ("x = y WHERE foo > 1.5", 3, []),
-        ("x = y WHERE foo = 'bar' OR baz = z.baz", 5, [ReferenceValue("z", "baz")]),
-        ("x = y WHERE (foo = 'bar' OR baz = z.baz) AND (fox = w.fox AND bbb = z.bbb)", 8, [ReferenceValue("z", "baz"), ReferenceValue("w", "fox"), ReferenceValue("z", "bbb")]),
-        ("x = GET process FROM s://x WHERE foo = z.foo", 6, [ReferenceValue("z", "foo")]),
-        ("x = GET file FROM s://y WHERE foo > 1.5", 4, []),
-        ("x = GET file FROM c://x WHERE foo = 'bar' OR baz = z.baz", 6, [ReferenceValue("z", "baz")]),
-        ("x = GET user FROM s://x WHERE (foo = 'bar' OR baz = z.baz) AND (fox = w.fox AND bbb = z.bbb)", 9, [ReferenceValue("z", "baz"), ReferenceValue("w", "fox"), ReferenceValue("z", "bbb")]),
-    ]
-)
-def test_reference_branch(stmt, node_cnt, expected):
-    graph = parse_kestrel(stmt)
-    assert len(graph) == node_cnt
-    filter_nodes = graph.get_nodes_by_type(Filter)
-    assert len(filter_nodes) == 1
-    filter_node = filter_nodes[0]
-    for rv in expected:
-        r = graph.get_reference(rv.reference)
-        assert r
-        projs = [p for p in graph.successors(r) if isinstance(p, ProjectAttrs) and p.attrs == [rv.attribute]]
-        assert projs and len(projs) == 1
-        proj = projs[0]
-        assert proj
-        assert list(graph.successors(proj)) == [filter_node]
-
-
-def test_parser_disp_after_new():
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-DISP proclist ATTR name, pid LIMIT 2 OFFSET 3
-"""
-    graph = parse_kestrel(stmt)
-    assert len(graph) == 6
-    c = graph.get_nodes_by_type(Construct)[0]
-    assert {"proclist"} == {v.name for v in graph.get_variables()}
-    proclist = graph.get_variable("proclist")
-    proj = graph.get_nodes_by_type(ProjectAttrs)[0]
-    assert proj.attrs == ['name', 'pid']
-    limit = graph.get_nodes_by_type(Limit)[0]
-    assert limit.num == 2
-    offset = graph.get_nodes_by_type(Offset)[0]
-    assert offset.num == 3
-    ret = graph.get_returns()[0]
-    assert len(graph.edges) == 5
-    assert (c, proclist) in graph.edges
-    assert (proclist, proj) in graph.edges
-    assert (proj, limit) in graph.edges
-    assert (limit, offset) in graph.edges
-    assert (offset, ret) in graph.edges
-
-
-def test_parser_explain_alone():
-    stmt = "EXPLAIN abc"
-    graph = parse_kestrel(stmt)
-    assert len(graph) == 3
-    assert len(graph.edges) == 2
-    assert Counter(map(type, graph.nodes())) == Counter([Reference, Explain, Return])
-
-
-def test_parser_explain_dereferred():
-    stmt = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-EXPLAIN proclist
-"""
-    graph = parse_kestrel(stmt)
-    assert len(graph) == 4
-    assert len(graph.edges) == 3
-    assert Counter(map(type, graph.nodes())) == Counter([Construct, Variable, Explain, Return])
diff --git a/packages-nextgen/kestrel_core/tests/test_session.py b/packages-nextgen/kestrel_core/tests/test_session.py
deleted file mode 100644
index 115154d4..00000000
--- a/packages-nextgen/kestrel_core/tests/test_session.py
+++ /dev/null
@@ -1,186 +0,0 @@
-import pytest
-import os
-from kestrel import Session
-from pandas import DataFrame
-from uuid import uuid4
-
-from kestrel.display import GraphExplanation
-from kestrel.ir.instructions import Construct
-from kestrel.config.internal import CACHE_INTERFACE_IDENTIFIER
-from kestrel.frontend.parser import parse_kestrel
-from kestrel.cache import SqliteCache
-
-
-def test_execute_in_cache():
-    hf = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-browsers = proclist WHERE name != "cmd.exe"
-DISP browsers
-cmd = proclist WHERE name = "cmd.exe"
-DISP cmd ATTR pid
-"""
-    b1 = DataFrame([ {"name": "explorer.exe", "pid": 99}
-                   , {"name": "firefox.exe", "pid": 201}
-                   , {"name": "chrome.exe", "pid": 205}
-                   ])
-    b2 = DataFrame([ {"pid": 123} ])
-    with Session() as session:
-        res = session.execute_to_generate(hf)
-        assert b1.equals(next(res))
-        assert b2.equals(next(res))
-        with pytest.raises(StopIteration):
-            next(res)
-
-
-def test_double_deref_in_cache():
-    # When the Filter node is dereferred twice
-    # The node should be deepcopied each time to avoid issue
-    hf = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-px = proclist WHERE name != "cmd.exe" AND pid = 205
-chrome = proclist WHERE pid IN px.pid
-DISP chrome
-DISP chrome
-"""
-    df = DataFrame([ {"name": "chrome.exe", "pid": 205} ])
-    with Session() as session:
-        res = session.execute_to_generate(hf)
-        assert df.equals(next(res))
-        assert df.equals(next(res))
-        with pytest.raises(StopIteration):
-            next(res)
-
-
-def test_explain_in_cache():
-    hf = """
-proclist = NEW process [ {"name": "cmd.exe", "pid": 123}
-                       , {"name": "explorer.exe", "pid": 99}
-                       , {"name": "firefox.exe", "pid": 201}
-                       , {"name": "chrome.exe", "pid": 205}
-                       ]
-browsers = proclist WHERE name != "cmd.exe"
-chrome = browsers WHERE pid = 205
-EXPLAIN chrome
-"""
-    with Session() as session:
-        ress = session.execute_to_generate(hf)
-        res = next(ress)
-        assert isinstance(res, GraphExplanation)
-        assert len(res.graphlets) == 1
-        ge = res.graphlets[0]
-        assert ge.graph == session.irgraph.to_dict()
-        construct = session.irgraph.get_nodes_by_type(Construct)[0]
-        assert ge.query.language == "SQL"
-        stmt = ge.query.statement.replace('"', '')
-        assert stmt == f'SELECT * \nFROM (SELECT * \nFROM (SELECT * \nFROM (SELECT * \nFROM {construct.id.hex}v) AS proclist \nWHERE name != \'cmd.exe\') AS browsers \nWHERE pid = 205) AS chrome'
-        with pytest.raises(StopIteration):
-            next(ress)
-
-
-def test_multi_interface_explain():
-
-    class DataLake(SqliteCache):
-        @staticmethod
-        def schemes():
-            return ["datalake"]
-
-    class Gateway(SqliteCache):
-        @staticmethod
-        def schemes():
-            return ["gateway"]
-
-    extra_db = []
-    with Session() as session:
-        stmt1 = """
-procs = NEW process [ {"name": "cmd.exe", "pid": 123}
-                    , {"name": "explorer.exe", "pid": 99}
-                    , {"name": "firefox.exe", "pid": 201}
-                    , {"name": "chrome.exe", "pid": 205}
-                    ]
-DISP procs
-"""
-        session.execute(stmt1)
-        session.interface_manager[CACHE_INTERFACE_IDENTIFIER].__class__ = DataLake
-        session.irgraph.get_nodes_by_type_and_attributes(Construct, {"interface": CACHE_INTERFACE_IDENTIFIER})[0].interface = "datalake"
-
-        new_cache = SqliteCache(session_id = uuid4())
-        extra_db.append(new_cache.db_path)
-        session.interface_manager.interfaces.append(new_cache)
-        stmt2 = """
-nt = NEW network [ {"pid": 123, "source": "192.168.1.1", "destination": "1.1.1.1"}
-                 , {"pid": 205, "source": "192.168.1.1", "destination": "1.1.1.2"}
-                 ]
-DISP nt
-"""
-        session.execute(stmt2)
-        session.interface_manager[CACHE_INTERFACE_IDENTIFIER].__class__ = Gateway
-        session.irgraph.get_nodes_by_type_and_attributes(Construct, {"interface": CACHE_INTERFACE_IDENTIFIER})[0].interface = "gateway"
-
-        new_cache = SqliteCache(session_id = uuid4())
-        extra_db.append(new_cache.db_path)
-        session.interface_manager.interfaces.append(new_cache)
-        stmt3 = """
-domain = NEW domain [ {"ip": "1.1.1.1", "domain": "cloudflare.com"}
-                    , {"ip": "1.1.1.2", "domain": "xyz.cloudflare.com"}
-                    ]
-DISP domain
-"""
-        session.execute(stmt3)
-
-        stmt = """
-p2 = procs WHERE name IN ("firefox.exe", "chrome.exe")
-ntx = nt WHERE pid IN p2.pid
-d2 = domain WHERE ip IN ntx.destination
-EXPLAIN d2
-DISP d2
-"""
-        ress = session.execute_to_generate(stmt)
-        disp = next(ress)
-        df_res = next(ress)
-
-        with pytest.raises(StopIteration):
-            next(ress)
-
-        assert isinstance(disp, GraphExplanation)
-        assert len(disp.graphlets) == 4
-
-        assert len(disp.graphlets[0].graph["nodes"]) == 5
-        query = disp.graphlets[0].query.statement.replace('"', '')
-        procs = session.irgraph.get_variable("procs")
-        c1 = next(session.irgraph.predecessors(procs))
-        assert query == f"SELECT pid \nFROM (SELECT * \nFROM (SELECT * \nFROM {c1.id.hex}) AS procs \nWHERE name IN ('firefox.exe', 'chrome.exe')) AS p2"
-
-        assert len(disp.graphlets[1].graph["nodes"]) == 2
-        query = disp.graphlets[1].query.statement.replace('"', '')
-        nt = session.irgraph.get_variable("nt")
-        c2 = next(session.irgraph.predecessors(nt))
-        assert query == f"SELECT * \nFROM (SELECT * \nFROM {c2.id.hex}) AS nt"
-
-        # the current session.execute_to_generate() logic does not store
-        # in cache if evaluated by cache; the behavior may change in the future
-        assert len(disp.graphlets[2].graph["nodes"]) == 2
-        query = disp.graphlets[2].query.statement.replace('"', '')
-        domain = session.irgraph.get_variable("domain")
-        c3 = next(session.irgraph.predecessors(domain))
-        assert query == f"SELECT * \nFROM (SELECT * \nFROM {c3.id.hex}) AS domain"
-
-        assert len(disp.graphlets[3].graph["nodes"]) == 12
-        print(disp.graphlets[3].graph["nodes"])
-        query = disp.graphlets[3].query.statement.replace('"', '')
-        p2 = session.irgraph.get_variable("p2")
-        p2pa = next(session.irgraph.successors(p2))
-        assert query == f"SELECT * \nFROM (SELECT * \nFROM (SELECT * \nFROM {c3.id.hex}) AS domain \nWHERE ip IN (SELECT destination \nFROM (SELECT * \nFROM {nt.id.hex}v \nWHERE pid IN (SELECT * \nFROM {p2pa.id.hex}v)) AS ntx)) AS d2"
-
-        df_ref = DataFrame([{"ip": "1.1.1.2", "domain": "xyz.cloudflare.com"}])
-        assert df_ref.equals(df_res)
-
-    for db_file in extra_db:
-        os.remove(db_file)
diff --git a/packages-nextgen/kestrel_interface_opensearch/pyproject.toml b/packages-nextgen/kestrel_interface_opensearch/pyproject.toml
deleted file mode 100644
index 6270f6d0..00000000
--- a/packages-nextgen/kestrel_interface_opensearch/pyproject.toml
+++ /dev/null
@@ -1,36 +0,0 @@
-[build-system]
-requires = ["setuptools >= 68.2.2", "wheel"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "kestrel_interface_opensearch"
-version = "2.0.0"
-description = "Kestrel OpenSearch Datasource Interface"
-readme = "README.rst"
-requires-python = ">=3.8"
-license = {text = "Apache 2.0 License"}
-maintainers = [
-    {name = "Xiaokui Shu", email = "xiaokui.shu@ibm.com"},
-    {name = "Paul Coccoli", email = "pcoccoli@us.ibm.com"},
-]
-keywords = [
-    "kestrel",
-    "cybersecurity",
-    "threat hunting",
-]
-classifiers = [
-    "Topic :: Security",
-    "Operating System :: OS Independent",
-    "Development Status :: 4 - Beta",
-    "Programming Language :: Python :: 3",
-]
-
-dependencies = [
-    "kestrel_core>=2.0.0",
-    "opensearch-py>=2.4.2",
-]
-
-[project.urls]
-Homepage = "https://github.com/opencybersecurityalliance/kestrel-lang"
-Documentation = "https://kestrel.readthedocs.io/"
-Repository = "https://github.com/opencybersecurityalliance/kestrel-lang.git"
diff --git a/packages-nextgen/kestrel_interface_opensearch/src/kestrel_interface_opensearch/__init__.py b/packages-nextgen/kestrel_interface_opensearch/src/kestrel_interface_opensearch/__init__.py
deleted file mode 100644
index 3ee389ca..00000000
--- a/packages-nextgen/kestrel_interface_opensearch/src/kestrel_interface_opensearch/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from kestrel_interface_opensearch.interface import OpenSearchInterface
diff --git a/packages-nextgen/kestrel_interface_opensearch/src/kestrel_interface_opensearch/config.py b/packages-nextgen/kestrel_interface_opensearch/src/kestrel_interface_opensearch/config.py
deleted file mode 100644
index 26d02ccf..00000000
--- a/packages-nextgen/kestrel_interface_opensearch/src/kestrel_interface_opensearch/config.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import logging
-from dataclasses import dataclass, field
-from typing import Dict, Mapping, Optional
-
-import yaml
-from mashumaro.mixins.json import DataClassJSONMixin
-
-from kestrel.config.utils import (
-    CONFIG_DIR_DEFAULT,
-    load_user_config,
-)
-from kestrel.exceptions import InterfaceNotConfigured
-from kestrel.mapping.data_model import load_default_mapping
-
-
-PROFILE_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "opensearch.yaml"
-PROFILE_PATH_ENV_VAR = "KESTREL_OPENSEARCH_CONFIG"
-
-_logger = logging.getLogger(__name__)
-
-
-@dataclass
-class Auth:
-    username: str
-    password: str
-
-
-@dataclass
-class Connection(DataClassJSONMixin):
-    url: str
-    auth: Auth
-    verify_certs: bool = True
-
-    def __post_init__(self):
-        self.auth = Auth(**self.auth)
-
-
-@dataclass
-class Index(DataClassJSONMixin):
-    connection: str
-    timestamp: str
-    timestamp_format: str
-    data_model_mapping: Optional[str] = None  # Filename for mapping
-    data_model_map: Mapping = field(default_factory=dict)
-
-    def __post_init__(self):
-        if self.data_model_mapping:
-            with open(self.data_model_mapping, "r") as fp:
-                self.data_model_map = yaml.safe_load(fp)
-        else:
-            # Default to the built-in ECS mapping
-            self.data_model_map = load_default_mapping("ecs")
-
-
-@dataclass
-class Config(DataClassJSONMixin):
-    connections: Dict[str, Connection]
-    indexes: Dict[str, Index]
-
-    def __post_init__(self):
-        self.connections = {k: Connection(**v) for k, v in self.connections.items()}
-        self.indexes = {k: Index(**v) for k, v in self.indexes.items()}
-
-
-def load_config():
-    try:
-        return Config(**load_user_config(PROFILE_PATH_ENV_VAR, PROFILE_PATH_DEFAULT))
-    except TypeError:
-        raise InterfaceNotConfigured()
diff --git a/packages-nextgen/kestrel_interface_opensearch/src/kestrel_interface_opensearch/interface.py b/packages-nextgen/kestrel_interface_opensearch/src/kestrel_interface_opensearch/interface.py
deleted file mode 100644
index 8c70eb95..00000000
--- a/packages-nextgen/kestrel_interface_opensearch/src/kestrel_interface_opensearch/interface.py
+++ /dev/null
@@ -1,211 +0,0 @@
-import logging
-from typing import Iterable, Mapping, Optional
-from uuid import UUID
-
-from opensearchpy import OpenSearch
-from pandas import DataFrame, Series, concat
-
-from kestrel.display import GraphletExplanation
-from kestrel.exceptions import DataSourceError
-from kestrel.interface import AbstractInterface
-from kestrel.ir.graph import IRGraphEvaluable
-from kestrel.ir.instructions import (
-    DataSource,
-    Instruction,
-    Return,
-    Variable,
-    Filter,
-    SourceInstruction,
-    TransformingInstruction,
-    SolePredecessorTransformingInstruction,
-)
-from kestrel.mapping.data_model import translate_dataframe
-
-from kestrel_interface_opensearch.config import load_config
-from kestrel_interface_opensearch.ossql import OpenSearchTranslator
-
-
-_logger = logging.getLogger(__name__)
-
-
-def _jdbc2df(schema: dict, datarows: dict) -> DataFrame:
-    """Convert a JDBC query result response to a DataFrame"""
-    columns = [c.get("alias", c["name"]) for c in schema]
-    return DataFrame(datarows, columns=columns)
-
-
-def read_sql(sql: str, conn: OpenSearch, dmm: Optional[dict] = None) -> DataFrame:
-    """Execute `sql` and return the results as a DataFrame, a la pandas.read_sql"""
-    # https://opensearch.org/docs/latest/search-plugins/sql/sql-ppl-api/#query-api
-    body = {
-        # Temporarily comment out fetch_size due to https://github.com/opensearch-project/sql/issues/2579
-        # FIXME: "fetch_size": 10000,  # Should we make this configurable?
-        "query": sql,
-    }
-    query_resp = conn.http.post("/_plugins/_sql?format=jdbc", body=body)
-    status = query_resp.get("status", 500)
-    if status != 200:
-        raise DataSourceError(f"OpenSearch query returned {status}")
-    _logger.debug(
-        "total=%d size=%d rows=%d",
-        query_resp["total"],
-        query_resp["size"],
-        len(query_resp["datarows"]),
-    )
-
-    # Only the first page contains the schema
-    # https://opensearch.org/docs/latest/search-plugins/sql/sql-ppl-api/#paginating-results
-    schema = query_resp["schema"]
-    dfs = []
-    done = False
-    while not done:
-        df = _jdbc2df(schema, query_resp["datarows"])
-        if dmm is not None:
-            # Need to use Data Model Map to do results translation
-            dfs.append(translate_dataframe(df, dmm))
-        else:
-            dfs.append(df)
-        cursor = query_resp.get("cursor")
-        if not cursor:
-            break
-        query_resp = conn.http.post(
-            "/_plugins/_sql?format=jdbc", body={"cursor": cursor}
-        )
-
-    # Merge all pages together
-    return concat(dfs)
-
-
-class OpenSearchInterface(AbstractInterface):
-    def __init__(
-        self,
-        serialized_cache_catalog: Optional[str] = None,
-        session_id: Optional[UUID] = None,
-    ):
-        super().__init__(serialized_cache_catalog, session_id)
-        self.config = load_config()
-        self.schemas: dict = {}  # Schema per table (index)
-        self.conns: dict = {}  # Map of conn name -> connection
-        for info in self.config.indexes.values():
-            name = info.connection
-            if name not in self.conns:
-                conn = self.config.connections[name]
-                client = OpenSearch(
-                    [conn.url],
-                    http_auth=(conn.auth.username, conn.auth.password),
-                    verify_certs=conn.verify_certs,
-                )
-                self.conns[name] = client
-
-    @staticmethod
-    def schemes() -> Iterable[str]:
-        return ["opensearch"]
-
-    def store(
-        self,
-        instruction_id: UUID,
-        data: DataFrame,
-    ):
-        raise NotImplementedError("OpenSearchInterface.store")  # TEMP
-
-    def evaluate_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instructions_to_evaluate: Optional[Iterable[Instruction]] = None,
-    ) -> Mapping[UUID, DataFrame]:
-        mapping = {}
-        if not instructions_to_evaluate:
-            instructions_to_evaluate = graph.get_sink_nodes()
-        for instruction in instructions_to_evaluate:
-            translator = self._evaluate_instruction_in_graph(graph, instruction)
-            # TODO: may catch error in case evaluation starts from incomplete SQL
-            sql = translator.result()
-            _logger.debug("SQL query generated: %s", sql)
-            ds = self.config.indexes[translator.table]  # table == datasource
-            conn = self.config.connections[ds.connection]
-            client = OpenSearch(
-                [conn.url],
-                http_auth=(conn.auth.username, conn.auth.password),
-                verify_certs=conn.verify_certs,
-            )
-            mapping[instruction.id] = read_sql(
-                sql, client, translator.from_ocsf_map[translator.entity]
-            )
-            client.close()
-        return mapping
-
-    def explain_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instructions_to_explain: Optional[Iterable[Instruction]] = None,
-    ) -> Mapping[UUID, GraphletExplanation]:
-        mapping = {}
-        if not instructions_to_explain:
-            instructions_to_explain = graph.get_sink_nodes()
-        for instruction in instructions_to_explain:
-            translator = self._evaluate_instruction_in_graph(graph, instruction)
-            dep_graph = graph.duplicate_dependent_subgraph_of_node(instruction)
-            graph_dict = dep_graph.to_dict()
-            query_stmt = translator.result()
-            mapping[instruction.id] = GraphletExplanation(graph_dict, query_stmt)
-        return mapping
-
-    def _evaluate_instruction_in_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instruction: Instruction,
-    ) -> OpenSearchTranslator:
-        _logger.debug("instruction: %s", str(instruction))
-        translator = None
-        if isinstance(instruction, TransformingInstruction):
-            trunk, _r2n = graph.get_trunk_n_branches(instruction)
-            translator = self._evaluate_instruction_in_graph(graph, trunk)
-
-            if isinstance(instruction, SolePredecessorTransformingInstruction):
-                if isinstance(instruction, Return):
-                    pass
-                elif isinstance(instruction, Variable):
-                    pass
-                else:
-                    translator.add_instruction(instruction)
-
-            elif isinstance(instruction, Filter):
-                translator.add_instruction(instruction)
-
-            else:
-                raise NotImplementedError(f"Unknown instruction type: {instruction}")
-
-        elif isinstance(instruction, SourceInstruction):
-            if isinstance(instruction, DataSource):
-                ds = self.config.indexes[instruction.datasource]
-                schema = self.get_schema(instruction.datasource)
-                translator = OpenSearchTranslator(
-                    ds.timestamp_format,
-                    ds.timestamp,
-                    instruction.datasource,
-                    ds.data_model_map,
-                    schema,
-                )
-            else:
-                raise NotImplementedError(f"Unhandled instruction type: {instruction}")
-
-        return translator
-
-    def _get_client_for_index(self, index: str) -> OpenSearch:
-        conn = self.config.indexes[index].connection
-        _logger.debug(
-            "Fetching schema for %s from %s", index, self.config.connections[conn].url
-        )
-        return self.conns[conn]
-
-    def get_schema(self, index: str) -> dict:
-        client = self._get_client_for_index(index)
-        if index not in self.schemas:
-            df = read_sql(f"DESCRIBE TABLES LIKE {index}", client)
-            self.schemas[index] = (
-                df[["TYPE_NAME", "COLUMN_NAME"]]
-                .set_index("COLUMN_NAME")
-                .T.to_dict("records")[0]
-            )
-            _logger.debug("%s schema:\n%s", index, self.schemas[index])
-        return self.schemas[index]
diff --git a/packages-nextgen/kestrel_interface_opensearch/src/kestrel_interface_opensearch/ossql.py b/packages-nextgen/kestrel_interface_opensearch/src/kestrel_interface_opensearch/ossql.py
deleted file mode 100644
index 018cd4c8..00000000
--- a/packages-nextgen/kestrel_interface_opensearch/src/kestrel_interface_opensearch/ossql.py
+++ /dev/null
@@ -1,249 +0,0 @@
-import logging
-from functools import reduce
-from typing import Optional, Union
-
-from typeguard import typechecked
-
-from kestrel.exceptions import UnsupportedOperatorError
-from kestrel.ir.filter import (
-    BoolExp,
-    ExpOp,
-    FComparison,
-    ListOp,
-    MultiComp,
-    NumCompOp,
-    StrComparison,
-    StrCompOp,
-)
-from kestrel.ir.instructions import (
-    Filter,
-    Instruction,
-    Limit,
-    Offset,
-    ProjectAttrs,
-    ProjectEntity,
-    Sort,
-    SortDirection,
-)
-from kestrel.mapping.data_model import (
-    translate_comparison_to_native,
-    translate_projection_to_native,
-)
-
-
-_logger = logging.getLogger(__name__)
-
-
-Value = Union[
-    int,
-    float,
-    str,
-    list,
-]
-
-
-@typechecked
-def _and(lhs: str, rhs: Value) -> str:
-    return " AND ".join((lhs, rhs))
-
-
-@typechecked
-def _or(lhs: str, rhs: Value) -> str:
-    return " OR ".join((lhs, rhs))
-
-
-# SQL comparison operator functions
-comp2func = {
-    NumCompOp.EQ: "=",
-    NumCompOp.NEQ: "<>",
-    NumCompOp.LT: "<",
-    NumCompOp.LE: "<=",
-    NumCompOp.GT: ">",
-    NumCompOp.GE: ">=",
-    StrCompOp.EQ: "=",
-    StrCompOp.NEQ: "<>",
-    StrCompOp.LIKE: "LIKE",
-    StrCompOp.NLIKE: "NOT LIKE",
-    # UNSUPPORTED BY OpenSearch SQL: StrCompOp.MATCHES: "REGEXP",
-    # UNSUPPORTED BY OpenSearch SQL: StrCompOp.NMATCHES: "NOT REGEXP",
-    ListOp.IN: "IN",
-    ListOp.NIN: "NOT IN",
-}
-
-
-def _format_value(value):
-    if isinstance(value, str):
-        # Need to quote string values
-        value = f"'{value}'"
-    elif isinstance(value, list):
-        # SQL uses parens for lists
-        value = tuple(value)
-    return value
-
-
-@typechecked
-class OpenSearchTranslator:
-    def __init__(
-        self,
-        timefmt: str,
-        timestamp: str,
-        select_from: str,
-        data_model_map: dict,
-        schema: dict,
-    ):
-        # Time format string for datasource
-        self.timefmt = timefmt
-
-        # Primary timestamp field in target table
-        self.timestamp = timestamp
-
-        # Query clauses
-        self.table: str = select_from
-        self.filt: Optional[Filter] = None
-        self.entity: Optional[str] = None
-        self.project: Optional[ProjectAttrs] = None
-        self.limit: int = 0
-        self.offset: int = 0
-        self.order_by: str = ""
-        self.sort_dir = SortDirection.DESC
-
-        # Data model mapping: should be ocsf -> native
-        self.from_ocsf_map = data_model_map
-
-        # Index "schema" (field name -> type)
-        self.schema = schema
-
-    @typechecked
-    def _render_comp(self, comp: FComparison) -> str:
-        prefix = (
-            f"{self.entity}." if (self.entity and comp.field != self.timestamp) else ""
-        )
-        ocsf_field = f"{prefix}{comp.field}"
-        comps = translate_comparison_to_native(
-            self.from_ocsf_map, ocsf_field, comp.op, comp.value
-        )
-        try:
-            comps = [f"{f} {comp2func[o]} {_format_value(v)}" for f, o, v in comps]
-            conj = " OR ".join(comps)
-            result = conj if len(comps) == 1 else f"({conj})"
-        except KeyError:
-            raise UnsupportedOperatorError(
-                comp.op.value
-            )  # FIXME: need to report the mapped op, not the original
-        return result
-
-    @typechecked
-    def _render_multi_comp(self, comps: MultiComp) -> str:
-        op = _and if comps.op == ExpOp.AND else _or
-        return reduce(op, map(self._render_comp, comps.comps))
-
-    @typechecked
-    def _render_exp(self, exp: BoolExp) -> str:
-        if isinstance(exp.lhs, BoolExp):
-            lhs = self._render_exp(exp.lhs)
-        elif isinstance(exp.lhs, MultiComp):
-            lhs = self._render_multi_comp(exp.lhs)
-        else:
-            lhs = self._render_comp(exp.lhs)
-        if isinstance(exp.rhs, BoolExp):
-            rhs = self._render_exp(exp.rhs)
-        elif isinstance(exp.rhs, MultiComp):
-            rhs = self._render_multi_comp(exp.rhs)
-        else:
-            rhs = self._render_comp(exp.rhs)
-        return _and(lhs, rhs) if exp.op == ExpOp.AND else _or(lhs, rhs)
-
-    @typechecked
-    def _render_filter(self) -> Optional[str]:
-        if not self.filt:
-            return None
-        if self.filt.timerange.start:
-            # Convert the timerange to the appropriate pair of comparisons
-            start_comp = StrComparison(
-                self.timestamp, ">=", self.filt.timerange.start.strftime(self.timefmt)
-            )
-            stop_comp = StrComparison(
-                self.timestamp, "<", self.filt.timerange.stop.strftime(self.timefmt)
-            )
-            # AND them together
-            time_exp = BoolExp(start_comp, ExpOp.AND, stop_comp)
-            # AND that with any existing filter expression
-            exp = BoolExp(self.filt.exp, ExpOp.AND, time_exp)
-        else:
-            exp = self.filt.exp
-        if isinstance(exp, BoolExp):
-            comp = self._render_exp(exp)
-        elif isinstance(exp, MultiComp):
-            comp = self._render_multi_comp(exp)
-        else:
-            comp = self._render_comp(exp)
-        return comp
-
-    def add_Filter(self, filt: Filter) -> None:
-        # Just save filter and compile it later
-        # Probably need the entity projection set first
-        self.filt = filt
-
-    def add_ProjectAttrs(self, proj: ProjectAttrs) -> None:
-        # Just save projection and compile it later
-        self.project = proj
-
-    def _render_proj(self):
-        """Get a list of native cols to project with their OCSF equivalents as SQL aliases"""
-        projection = self.project.attrs if self.project else None
-        name_pairs = translate_projection_to_native(
-            self.from_ocsf_map, self.entity, projection
-        )
-        proj = [
-            f"`{k}` AS `{v}`" if k != v else f"`{k}`"
-            for k, v in name_pairs
-            if k in self.schema  # Ignore mapped attrs the index doesn't have
-        ]
-        if not proj:
-            # If this is still empty, then the attr projection must be for attrs "outside" to entity projection?
-            proj = [f"`{attr}`" for attr in self.project.attrs]
-        _logger.debug("Set projection to %s", proj)
-        return proj
-
-    def add_ProjectEntity(self, proj: ProjectEntity) -> None:
-        self.entity = proj.entity_type
-        _logger.debug("Set base entity to '%s'", self.entity)
-
-    def add_Limit(self, lim: Limit) -> None:
-        self.limit = lim.num
-
-    def add_Offset(self, offset: Offset) -> None:
-        self.offset = offset.num
-
-    def add_Sort(self, sort: Sort) -> None:
-        self.order_by = sort.attribute
-        self.sort_dir = sort.direction
-
-    def add_instruction(self, i: Instruction) -> None:
-        inst_name = i.instruction
-        method_name = f"add_{inst_name}"
-        try:
-            method = getattr(self, method_name)
-        except AttributeError as e:
-            raise NotImplementedError(f"OpenSearchTranslator.{method_name}")
-        method(i)
-
-    def result(self) -> str:
-        stages = ["SELECT"]
-        cols = ", ".join(self._render_proj())
-        stages.append(f"{cols}")
-        stages.append(f"FROM {self.table}")
-        where = self._render_filter()
-        if where:
-            stages.append(f"WHERE {where}")
-        if self.order_by:
-            stages.append(f"ORDER BY {self.order_by} {self.sort_dir.value}")
-        if self.limit:
-            # https://opensearch.org/docs/latest/search-plugins/sql/sql/basic/#limit
-            if self.offset:
-                stages.append(f"LIMIT {self.offset}, {self.limit}")
-            else:
-                stages.append(f"LIMIT {self.limit}")
-        sql = " ".join(stages)
-        _logger.debug("SQL: %s", sql)
-        return sql
diff --git a/packages-nextgen/kestrel_interface_opensearch/tests/__init__.py b/packages-nextgen/kestrel_interface_opensearch/tests/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_interface_opensearch/tests/test_config.py b/packages-nextgen/kestrel_interface_opensearch/tests/test_config.py
deleted file mode 100644
index 85241b71..00000000
--- a/packages-nextgen/kestrel_interface_opensearch/tests/test_config.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import os
-
-import yaml
-
-from kestrel_interface_opensearch.config import (
-    PROFILE_PATH_ENV_VAR,
-    Connection,
-    load_config,
-)
-
-
-def test_load_config(tmp_path):
-    config = {
-        "connections": {
-            "localhost": {
-                "url": "https://localhost:9200",
-                "verify_certs": False,
-                "auth": {
-                    "username": "admin",
-                    "password": "admin",
-                }
-            },
-            "some-cloud-thing": {
-                "url": "https://www.example.com:9200",
-                "verify_certs": True,
-                "auth": {
-                    "username": "hunter",
-                    "password": "super_secret",
-                }
-            }
-        },
-        "indexes": {
-            "some_index": {
-                "connection": "some-cloud-thing",
-                "timestamp": "@timestamp",
-                "timestamp_format": "%Y-%m-%d %H:%M:%S.%f",
-                "data_model_mapping": str(tmp_path / "mapping.yaml")
-            }
-        }
-    }
-    map_file = tmp_path / "mapping.yaml"
-    with open(map_file, 'w') as fp:
-        fp.write("some.field: other.field\n")
-    config_file = tmp_path / "opensearch.yaml"
-    with open(config_file, 'w') as fp:
-        yaml.dump(config, fp)
-    os.environ[PROFILE_PATH_ENV_VAR] = str(config_file)
-    read_config = load_config()
-    conn: Connection = read_config.connections["localhost"]
-    assert conn.url == config["connections"]["localhost"]["url"]
-    assert read_config.connections["localhost"].url == config["connections"]["localhost"]["url"]
-    assert read_config.indexes["some_index"].timestamp == config["indexes"]["some_index"]["timestamp"]
diff --git a/packages-nextgen/kestrel_interface_opensearch/tests/test_ossql.py b/packages-nextgen/kestrel_interface_opensearch/tests/test_ossql.py
deleted file mode 100644
index 838b57e2..00000000
--- a/packages-nextgen/kestrel_interface_opensearch/tests/test_ossql.py
+++ /dev/null
@@ -1,127 +0,0 @@
-from datetime import datetime
-from dateutil import parser
-
-from kestrel_interface_opensearch.ossql import OpenSearchTranslator
-from kestrel.exceptions import UnsupportedOperatorError
-from kestrel.ir.filter import (
-    BoolExp,
-    ExpOp,
-    FComparison,
-    IntComparison,
-    ListOp,
-    ListComparison,
-    MultiComp,
-    NumCompOp,
-    StrCompOp,
-    StrComparison,
-    TimeRange,
-)
-from kestrel.ir.instructions import (
-    DataSource,
-    Filter,
-    Limit,
-    Offset,
-    ProjectAttrs,
-    ProjectEntity,
-    Sort,
-    SortDirection,
-)
-
-import pytest
-
-
-TIMEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'
-
-
-# A much-simplified test mapping
-data_model_map = {
-    "process": {
-        "cmd_line": "CommandLine",
-        "file": {
-            "path": "Image",
-            # "name": [
-            #     {
-            #         "native_field": "Image",
-            #         "native_value": "basename",
-            #         "ocsf_op": "LIKE",
-            #         "ocsf_value": "endswith"
-            #     }
-            # ]
-        },
-        "pid": "ProcessId",
-        "parent_process": {
-            "pid": "ParentProcessId",
-        },
-    },
-}
-
-schema = {
-    "CommandLine": "text",
-    "Image": "text",
-    "ProcessId": "text",
-    "ParentProcessId": "text",
-}
-
-
-def _dt(timestr: str) -> datetime:
-    return parser.parse(timestr)
-
-
-def _remove_nl(s):
-    return s.replace('\n', '')
-
-
-@pytest.mark.parametrize(
-    "iseq, sql", [
-        # Try a simple filter
-        ([Filter(IntComparison('foo', NumCompOp.GE, 0))],
-         "SELECT {} FROM my_table WHERE foo >= 0"),
-        # Try a simple filter with sorting
-        ([Filter(IntComparison('foo', NumCompOp.GE, 0)), Sort('bar')],
-         "SELECT {} FROM my_table WHERE foo >= 0 ORDER BY bar DESC"),
-        # Simple filter plus time range
-        ([Filter(IntComparison('foo', NumCompOp.GE, 0), timerange=TimeRange(_dt('2023-12-06T08:17:00Z'), _dt('2023-12-07T08:17:00Z')))],
-         "SELECT {} FROM my_table WHERE foo >= 0 AND timestamp >= '2023-12-06T08:17:00.000000Z' AND timestamp < '2023-12-07T08:17:00.000000Z'"),
-        # Add a limit and projection
-        ([Limit(3), ProjectAttrs(['foo', 'bar', 'baz']), Filter(StrComparison('foo', StrCompOp.EQ, 'abc'))],
-         "SELECT `foo`, `bar`, `baz` FROM my_table WHERE foo = 'abc' LIMIT 3"),
-        # Same as above but reverse order
-        ([Filter(StrComparison('foo', StrCompOp.EQ, 'abc')), ProjectAttrs(['foo', 'bar', 'baz']), Limit(3)],
-         "SELECT `foo`, `bar`, `baz` FROM my_table WHERE foo = 'abc' LIMIT 3"),
-        ([Filter(ListComparison('foo', ListOp.NIN, ['abc', 'def']))],
-         "SELECT {} FROM my_table WHERE foo NOT IN ('abc', 'def')"),
-        ([Filter(MultiComp(ExpOp.OR, [IntComparison('foo', NumCompOp.EQ, 1), IntComparison('bar', NumCompOp.EQ, 1)]))],
-         "SELECT {} FROM my_table WHERE foo = 1 OR bar = 1"),
-        ([Filter(MultiComp(ExpOp.AND, [IntComparison('foo', NumCompOp.EQ, 1), IntComparison('bar', NumCompOp.EQ, 1)]))],
-         "SELECT {} FROM my_table WHERE foo = 1 AND bar = 1"),
-        ([Limit(1000), Offset(2000)],
-         "SELECT {} FROM my_table LIMIT 2000, 1000"),
-        # Test entity projection
-        ([Limit(3), Filter(StrComparison('cmd_line', StrCompOp.EQ, 'foo bar')), ProjectEntity('process')],
-         "SELECT {} FROM my_table WHERE CommandLine = 'foo bar' LIMIT 3"),
-    ]
-)
-def test_opensearch_translator(iseq, sql):
-    cols = '`CommandLine` AS `cmd_line`, `Image` AS `file.path`, `ProcessId` AS `pid`, `ParentProcessId` AS `parent_process.pid`'
-    if ProjectEntity in {type(i) for i in iseq}:
-        cols = '`CommandLine` AS `cmd_line`, `Image` AS `file.path`, `ProcessId` AS `pid`, `ParentProcessId` AS `parent_process.pid`'
-    else:
-        cols = '`CommandLine` AS `process.cmd_line`, `Image` AS `process.file.path`, `ProcessId` AS `process.pid`, `ParentProcessId` AS `process.parent_process.pid`'
-    trans = OpenSearchTranslator(TIMEFMT, "timestamp", "my_table", data_model_map, schema)
-    for i in iseq:
-        trans.add_instruction(i)
-    result = trans.result()
-    assert _remove_nl(str(result)) == sql.format(cols)
-
-
-@pytest.mark.parametrize(
-    "instruction", [
-        Filter(StrComparison('foo', StrCompOp.MATCHES, '.*abc.*')),
-        Filter(StrComparison('foo', StrCompOp.NMATCHES, '.*abc.*')),
-    ]
-)
-def test_opensearch_translator_unsupported(instruction):
-    trans = OpenSearchTranslator(TIMEFMT, "timestamp", "my_table", data_model_map, schema)
-    with pytest.raises(UnsupportedOperatorError):
-        trans.add_instruction(instruction)
-        _ = trans.result()
diff --git a/packages-nextgen/kestrel_interface_sqlalchemy/pyproject.toml b/packages-nextgen/kestrel_interface_sqlalchemy/pyproject.toml
deleted file mode 100644
index c4309e70..00000000
--- a/packages-nextgen/kestrel_interface_sqlalchemy/pyproject.toml
+++ /dev/null
@@ -1,35 +0,0 @@
-[build-system]
-requires = ["setuptools >= 68.2.2", "wheel"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "kestrel_interface_sqlalchemy"
-version = "2.0.0"
-description = "Kestrel SQLAlchemy Datasource Interface"
-readme = "README.rst"
-requires-python = ">=3.8"
-license = {text = "Apache 2.0 License"}
-maintainers = [
-    {name = "Xiaokui Shu", email = "xiaokui.shu@ibm.com"},
-    {name = "Paul Coccoli", email = "pcoccoli@us.ibm.com"},
-]
-keywords = [
-    "kestrel",
-    "cybersecurity",
-    "threat hunting",
-]
-classifiers = [
-    "Topic :: Security",
-    "Operating System :: OS Independent",
-    "Development Status :: 4 - Beta",
-    "Programming Language :: Python :: 3",
-]
-
-dependencies = [
-    "kestrel_core>=2.0.0",
-]
-
-[project.urls]
-Homepage = "https://github.com/opencybersecurityalliance/kestrel-lang"
-Documentation = "https://kestrel.readthedocs.io/"
-Repository = "https://github.com/opencybersecurityalliance/kestrel-lang.git"
diff --git a/packages-nextgen/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/__init__.py b/packages-nextgen/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/__init__.py
deleted file mode 100644
index 781df021..00000000
--- a/packages-nextgen/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from kestrel_interface_sqlalchemy.interface import SQLAlchemyInterface
diff --git a/packages-nextgen/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/config.py b/packages-nextgen/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/config.py
deleted file mode 100644
index e9d148e4..00000000
--- a/packages-nextgen/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/config.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import logging
-from dataclasses import dataclass, field
-from typing import Dict, Mapping, Optional
-
-import yaml
-from mashumaro.mixins.json import DataClassJSONMixin
-
-from kestrel.config.utils import (
-    CONFIG_DIR_DEFAULT,
-    load_user_config,
-)
-from kestrel.exceptions import InterfaceNotConfigured
-from kestrel.mapping.data_model import load_default_mapping
-
-
-PROFILE_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "sqlalchemy.yaml"
-PROFILE_PATH_ENV_VAR = "KESTREL_SQLALCHEMY_CONFIG"
-
-_logger = logging.getLogger(__name__)
-
-
-@dataclass
-class Connection(DataClassJSONMixin):
-    url: str  # SQLAlchemy "connection URL" or "connection string"
-
-
-@dataclass
-class Table(DataClassJSONMixin):
-    connection: str
-    timestamp: str
-    timestamp_format: str
-    data_model_mapping: Optional[str] = None  # Filename for mapping
-    data_model_map: Mapping = field(default_factory=dict)
-
-    def __post_init__(self):
-        if self.data_model_mapping:
-            with open(self.data_model_mapping, "r") as fp:
-                self.data_model_map = yaml.safe_load(fp)
-        else:
-            # Default to the built-in ECS mapping
-            self.data_model_map = load_default_mapping("ecs")  # FIXME: need a default?
-
-
-@dataclass
-class Config(DataClassJSONMixin):
-    connections: Dict[str, Connection]
-    tables: Dict[str, Table]
-
-    def __post_init__(self):
-        self.connections = {k: Connection(**v) for k, v in self.connections.items()}
-        self.tables = {k: Table(**v) for k, v in self.tables.items()}
-
-
-def load_config():
-    try:
-        return Config(**load_user_config(PROFILE_PATH_ENV_VAR, PROFILE_PATH_DEFAULT))
-    except TypeError:
-        raise InterfaceNotConfigured()
diff --git a/packages-nextgen/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/interface.py b/packages-nextgen/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/interface.py
deleted file mode 100644
index 6197ab5e..00000000
--- a/packages-nextgen/kestrel_interface_sqlalchemy/src/kestrel_interface_sqlalchemy/interface.py
+++ /dev/null
@@ -1,268 +0,0 @@
-import logging
-from functools import reduce
-from typing import Callable, Iterable, Mapping, Optional
-from uuid import UUID
-
-from pandas import DataFrame, read_sql
-import sqlalchemy
-from sqlalchemy import and_, column, or_
-from sqlalchemy.sql.elements import BooleanClauseList
-from sqlalchemy.sql.expression import ColumnClause
-from typeguard import typechecked
-
-from kestrel.display import GraphletExplanation
-from kestrel.interface import AbstractInterface
-from kestrel.interface.codegen.sql import SqlTranslator, comp2func
-from kestrel.ir.filter import (
-    BoolExp,
-    ExpOp,
-    FComparison,
-    MultiComp,
-    StrComparison,
-    StrCompOp,
-)
-from kestrel.ir.graph import IRGraphEvaluable
-from kestrel.ir.instructions import (
-    DataSource,
-    Filter,
-    Instruction,
-    ProjectAttrs,
-    ProjectEntity,
-    Return,
-    SolePredecessorTransformingInstruction,
-    SourceInstruction,
-    TransformingInstruction,
-    Variable,
-)
-from kestrel.mapping.data_model import (
-    translate_comparison_to_native,
-    translate_dataframe,
-    translate_projection_to_native,
-)
-
-from kestrel_interface_sqlalchemy.config import load_config
-
-
-_logger = logging.getLogger(__name__)
-
-
-@typechecked
-class SQLAlchemyTranslator(SqlTranslator):
-    def __init__(
-        self,
-        dialect: sqlalchemy.engine.default.DefaultDialect,
-        timefmt: Callable,
-        timestamp: str,
-        from_obj: sqlalchemy.FromClause,
-        dmm: dict,
-    ):
-        super().__init__(dialect, timefmt, timestamp, from_obj)
-        self.dmm = dmm
-        self.proj = None
-        self.entity_type = None
-
-    @typechecked
-    def _render_comp(self, comp: FComparison):
-        prefix = (
-            f"{self.entity_type}."
-            if (self.entity_type and comp.field != self.timestamp)
-            else ""
-        )
-        ocsf_field = f"{prefix}{comp.field}"
-        comps = translate_comparison_to_native(
-            self.dmm, ocsf_field, comp.op, comp.value
-        )
-        translated_comps = []
-        for comp in comps:
-            field, op, value = comp
-            col: ColumnClause = column(field)
-            if op == StrCompOp.NMATCHES:
-                tmp = ~comp2func[op](col, value)
-            else:
-                tmp = comp2func[op](col, value)
-            translated_comps.append(tmp)
-        return reduce(or_, translated_comps)
-
-    @typechecked
-    def _render_multi_comp(self, comps: MultiComp):
-        op = and_ if comps.op == ExpOp.AND else or_
-        return reduce(op, map(self._render_comp, comps.comps))
-
-    # This is copied verbatim from sql.py but we need to supply our own _render_comp
-    def _render_exp(self, exp: BoolExp) -> BooleanClauseList:
-        if isinstance(exp.lhs, BoolExp):
-            lhs = self._render_exp(exp.lhs)
-        elif isinstance(exp.lhs, MultiComp):
-            lhs = self._render_multi_comp(exp.lhs)
-        else:
-            lhs = self._render_comp(exp.lhs)
-        if isinstance(exp.rhs, BoolExp):
-            rhs = self._render_exp(exp.rhs)
-        elif isinstance(exp.rhs, MultiComp):
-            rhs = self._render_multi_comp(exp.rhs)
-        else:
-            rhs = self._render_comp(exp.rhs)
-        return and_(lhs, rhs) if exp.op == ExpOp.AND else or_(lhs, rhs)
-
-    @typechecked
-    def _add_filter(self) -> Optional[str]:
-        if not self.filt:
-            return None
-        filt = self.filt
-        if filt.timerange.start:
-            # Convert the timerange to the appropriate pair of comparisons
-            start_comp = StrComparison(
-                self.timestamp, ">=", self.timefmt(filt.timerange.start)
-            )
-            stop_comp = StrComparison(
-                self.timestamp, "<", self.timefmt(filt.timerange.stop)
-            )
-            # AND them together
-            time_exp = BoolExp(start_comp, ExpOp.AND, stop_comp)
-            # AND that with any existing filter expression
-            exp = BoolExp(filt.exp, ExpOp.AND, time_exp)
-        else:
-            exp = filt.exp
-        if isinstance(exp, BoolExp):
-            comp = self._render_exp(exp)
-        elif isinstance(exp, MultiComp):
-            comp = self._render_multi_comp(exp)
-        else:
-            comp = self._render_comp(exp)
-        self.query = self.query.where(comp)
-
-    def add_Filter(self, filt: Filter) -> None:
-        # Just save filter and compile it later
-        # Probably need the entity projection set first
-        self.filt = filt
-
-    def add_ProjectAttrs(self, proj: ProjectAttrs) -> None:
-        self.proj = proj
-
-    def add_ProjectEntity(self, proj: ProjectEntity) -> None:
-        self.entity_type = proj.entity_type
-
-    def result(self) -> sqlalchemy.Compiled:
-        proj = self.proj.attrs if self.proj else None
-        pairs = translate_projection_to_native(self.dmm, self.entity_type, proj)
-        cols = [sqlalchemy.column(i).label(j) for i, j in pairs]
-        self._add_filter()
-        self.query = self.query.with_only_columns(*cols)  # TODO: mapping?
-        return self.query.compile(dialect=self.dialect)
-
-
-class SQLAlchemyInterface(AbstractInterface):
-    def __init__(
-        self,
-        serialized_cache_catalog: Optional[str] = None,
-        session_id: Optional[UUID] = None,
-    ):
-        _logger.debug("SQLAlchemyInterface: loading config")
-        super().__init__(serialized_cache_catalog, session_id)
-        self.config = load_config()
-        self.schemas: dict = {}  # Schema per table (index)
-        self.engines: dict = {}  # Map of conn name -> engine
-        self.conns: dict = {}  # Map of conn name -> connection
-        for info in self.config.tables.values():
-            name = info.connection
-            conn_info = self.config.connections[name]
-            if name not in self.engines:
-                self.engines[name] = sqlalchemy.create_engine(conn_info.url)
-            if name not in self.conns:
-                engine = self.engines[name]
-                self.conns[name] = engine.connect()
-            _logger.debug("SQLAlchemyInterface: configured %s", name)
-
-    @staticmethod
-    def schemes() -> Iterable[str]:
-        return ["sqlalchemy"]
-
-    def store(
-        self,
-        instruction_id: UUID,
-        data: DataFrame,
-    ):
-        raise NotImplementedError("SQLAlchemyInterface.store")  # TEMP
-
-    def evaluate_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instructions_to_evaluate: Optional[Iterable[Instruction]] = None,
-    ) -> Mapping[UUID, DataFrame]:
-        mapping = {}
-        if not instructions_to_evaluate:
-            instructions_to_evaluate = graph.get_sink_nodes()
-        for instruction in instructions_to_evaluate:
-            translator = self._evaluate_instruction_in_graph(graph, instruction)
-            # TODO: may catch error in case evaluation starts from incomplete SQL
-            sql = translator.result()
-            _logger.debug("SQL query generated: %s", sql)
-            # Get the "from" table for this query
-            tables = translator.query.selectable.get_final_froms()
-            table = tables[0].name  # TODO: what if there's more than 1?
-            # Get the data source's SQLAlchemy connection object
-            conn = self.conns[self.config.tables[table].connection]
-            df = read_sql(sql, conn)
-            dmm = translator.dmm[
-                translator.entity_type
-            ]  # TODO: need a method for this?
-            mapping[instruction.id] = translate_dataframe(df, dmm)
-        return mapping
-
-    def explain_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instructions_to_explain: Optional[Iterable[Instruction]] = None,
-    ) -> Mapping[UUID, GraphletExplanation]:
-        mapping = {}
-        if not instructions_to_explain:
-            instructions_to_explain = graph.get_sink_nodes()
-        for instruction in instructions_to_explain:
-            translator = self._evaluate_instruction_in_graph(graph, instruction)
-            dep_graph = graph.duplicate_dependent_subgraph_of_node(instruction)
-            graph_dict = dep_graph.to_dict()
-            query_stmt = translator.result()
-            mapping[instruction.id] = GraphletExplanation(graph_dict, query_stmt)
-        return mapping
-
-    def _evaluate_instruction_in_graph(
-        self,
-        graph: IRGraphEvaluable,
-        instruction: Instruction,
-    ) -> SQLAlchemyTranslator:
-        _logger.debug("instruction: %s", str(instruction))
-        translator = None
-        if isinstance(instruction, TransformingInstruction):
-            trunk, _r2n = graph.get_trunk_n_branches(instruction)
-            translator = self._evaluate_instruction_in_graph(graph, trunk)
-
-            if isinstance(instruction, SolePredecessorTransformingInstruction):
-                if isinstance(instruction, Return):
-                    pass
-                elif isinstance(instruction, Variable):
-                    pass
-                else:
-                    translator.add_instruction(instruction)
-
-            elif isinstance(instruction, Filter):
-                translator.add_instruction(instruction)
-
-            else:
-                raise NotImplementedError(f"Unknown instruction type: {instruction}")
-
-        elif isinstance(instruction, SourceInstruction):
-            if isinstance(instruction, DataSource):
-                ds = self.config.tables[instruction.datasource]
-                connection = ds.connection
-                dialect = self.engines[connection].dialect
-                translator = SQLAlchemyTranslator(
-                    dialect,
-                    lambda dt: dt.strftime(ds.timestamp_format),
-                    ds.timestamp,
-                    sqlalchemy.table(instruction.datasource),
-                    ds.data_model_map,
-                )
-            else:
-                raise NotImplementedError(f"Unhandled instruction type: {instruction}")
-
-        return translator
diff --git a/packages-nextgen/kestrel_interface_sqlalchemy/tests/test_config.py b/packages-nextgen/kestrel_interface_sqlalchemy/tests/test_config.py
deleted file mode 100644
index a19d97a6..00000000
--- a/packages-nextgen/kestrel_interface_sqlalchemy/tests/test_config.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import os
-
-import yaml
-
-from kestrel_interface_sqlalchemy.config import (
-    PROFILE_PATH_ENV_VAR,
-    Connection,
-    load_config,
-)
-
-
-def test_load_config(tmp_path):
-    config = {
-        "connections": {
-            "localhost": {
-                "url": "sqlite:////home/jdoe/test.db",
-            },
-            "some-data-lake": {
-                "url": "presto://jdoe@example.com:8889/hive",
-            }
-        },
-        "tables": {
-            "cloud_table": {
-                "connection": "some-data-lake",
-                "timestamp": "eventTime",
-                "timestamp_format": "%Y-%m-%d %H:%M:%S.%f",
-                "data_model_mapping": str(tmp_path / "mapping.yaml")
-            }
-        }
-    }
-    map_file = tmp_path / "mapping.yaml"
-    with open(map_file, 'w') as fp:
-        fp.write("some.field: other.field\n")
-    config_file = tmp_path / "sqlalchemy.yaml"
-    with open(config_file, 'w') as fp:
-        yaml.dump(config, fp)
-    os.environ[PROFILE_PATH_ENV_VAR] = str(config_file)
-    read_config = load_config()
-    conn: Connection = read_config.connections["localhost"]
-    assert conn.url == config["connections"]["localhost"]["url"]
-    assert read_config.connections["localhost"].url == config["connections"]["localhost"]["url"]
-    assert read_config.tables["cloud_table"].timestamp == config["tables"]["cloud_table"]["timestamp"]
diff --git a/packages-nextgen/kestrel_jupyter/README.rst b/packages-nextgen/kestrel_jupyter/README.rst
deleted file mode 120000
index c768ff7d..00000000
--- a/packages-nextgen/kestrel_jupyter/README.rst
+++ /dev/null
@@ -1 +0,0 @@
-../../README.rst
\ No newline at end of file
diff --git a/packages-nextgen/kestrel_jupyter/pyproject.toml b/packages-nextgen/kestrel_jupyter/pyproject.toml
deleted file mode 100644
index 3cc31435..00000000
--- a/packages-nextgen/kestrel_jupyter/pyproject.toml
+++ /dev/null
@@ -1,56 +0,0 @@
-[build-system]
-requires = ["setuptools >= 68.2.2", "wheel"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "kestrel_jupyter"
-version = "2.0.0"
-description = "Kestrel Jupyter Kernel"
-readme = "README.rst"
-requires-python = ">=3.8"
-license = {text = "Apache 2.0 License"}
-maintainers = [
-    {name = "Xiaokui Shu", email = "xiaokui.shu@ibm.com"},
-    {name = "Paul Coccoli", email = "pcoccoli@us.ibm.com"},
-]
-keywords = [
-    "kestrel",
-    "Jupyter",
-    "kernel",
-]
-classifiers = [
-    "Topic :: Security",
-    "Operating System :: OS Independent",
-    "Development Status :: 4 - Beta",
-    "Programming Language :: Python :: 3",
-]
-
-dependencies = [
-    "kestrel_core==2.0.0",
-    "jupyterlab-server",
-    "jupyterlab",
-    "jupyter_client",
-    "nbclassic",
-    "sqlparse==0.4.4",
-    "pygments==2.17.2",
-    "matplotlib==3.8.3",
-]
-
-[project.optional-dependencies]
-test = [
-    "pytest",
-]
-
-[project.urls]
-Homepage = "https://github.com/opencybersecurityalliance/kestrel-lang"
-Documentation = "https://kestrel.readthedocs.io/"
-Repository = "https://github.com/opencybersecurityalliance/kestrel-lang.git"
-
-[project.scripts]
-kestrel_jupyter_setup = "kestrel_jupyter_kernel.setup:run"
-
-[tool.setuptools.packages.find]
-where = ["src"]
-
-[tool.setuptools.package-data]
-"*" = ["*.js"]
diff --git a/packages-nextgen/kestrel_jupyter/src/kestrel_ipython/__init__.py b/packages-nextgen/kestrel_jupyter/src/kestrel_ipython/__init__.py
deleted file mode 100644
index b79424d7..00000000
--- a/packages-nextgen/kestrel_jupyter/src/kestrel_ipython/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-import kestrel_ipython.magic
diff --git a/packages-nextgen/kestrel_jupyter/src/kestrel_ipython/magic.py b/packages-nextgen/kestrel_jupyter/src/kestrel_ipython/magic.py
deleted file mode 100644
index aeac9c38..00000000
--- a/packages-nextgen/kestrel_jupyter/src/kestrel_ipython/magic.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import sys
-import re
-
-from IPython.core.magic import (
-    line_cell_magic,
-    Magics,
-    magics_class,
-)
-
-from kestrel.session import Session
-
-
-@magics_class
-class KestrelMagic(Magics):
-    def __init__(self, shell=None, config=None, user_magics=None, **traits):
-        super().__init__(shell=shell, config=config, user_magics=user_magics, **traits)
-        self.session = None
-
-    def __check_magic(self, line="", cell=None):
-        """
-        Some non-Kestrel commands to handle separately for initializing the session.
-        This likely includes how to connect to UDI, ATK, and other parameters.
-        """
-        # regex is a simple hack
-        r = r"^\s*(session)\s+(init)\s*(true|false)?\s*$"
-        m = re.match(r, line, re.IGNORECASE)
-        if m is None:
-            return False
-        stderr = m.groups()[2] is not None and m.groups()[2].lower() == "true"
-        self.session = Session(stderr)
-        return True
-
-    @line_cell_magic
-    def kestrel(self, line="", cell=None):
-        """
-        session init [true / false]
-        """
-        if self.__check_magic(line, cell):
-            if len(line) > 0:
-                line = ""
-                if cell is None:
-                    return
-
-        if self.session is None:
-            self.session = Session()
-        if len(line) == 0 and cell is None:
-            sys.stderr.write("Need to provide a Kestrel query to execute")
-            return None
-        if cell is None:
-            # assert cell is None
-            return self.session.execute(line)
-        else:
-            sys.stderr.write(repr(cell))
-            if len(line) != 0:
-                self.session.execute(line)
-            return self.session.execute(cell)
-        # indx = line.lower().find('as df')
-        # if indx != -1:
-        #     return pd.DataFrame.from_records(self.session.execute(line[:indx])[0])
-        # else: return self.session.execute(line)
-
-
-ip = get_ipython()
-ip.register_magics(KestrelMagic)
diff --git a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/__init__.py b/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/__init__.py
deleted file mode 100644
index e25addab..00000000
--- a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from kestrel_jupyter_kernel.kernel import KestrelKernel
diff --git a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/__main__.py b/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/__main__.py
deleted file mode 100644
index 5eebb1a3..00000000
--- a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/__main__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from ipykernel.kernelapp import IPKernelApp
-from kestrel_jupyter_kernel import KestrelKernel
-
-if __name__ == "__main__":
-    IPKernelApp.launch_instance(kernel_class=KestrelKernel)
diff --git a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/codemirror/__init__.py b/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/codemirror/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/codemirror/kestrel_template.js b/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/codemirror/kestrel_template.js
deleted file mode 100644
index 9a9ac5cf..00000000
--- a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/codemirror/kestrel_template.js
+++ /dev/null
@@ -1,153 +0,0 @@
-(function(mod) {
-  if (typeof exports == "object" && typeof module == "object") // CommonJS
-    mod(require("../../lib/codemirror"));
-  else if (typeof define == "function" && define.amd) // AMD
-    define(["../../lib/codemirror"], mod);
-  else // Plain browser env
-    mod(CodeMirror);
-})(function(CodeMirror) {
-  "use strict";
-
-  CodeMirror.defineMode("kestrel", function() {
-
-    function switchState(source, setState, f) {
-      setState(f);
-      return f(source, setState);
-    }
-
-    var smallRE = /[a-z_]/;
-    var largeRE = /[A-Z]/;
-    var digitRE = /[0-9]/;
-    var hexitRE = /[0-9A-Fa-f]/;
-    var octitRE = /[0-7]/;
-    var idRE = /[a-z_A-Z0-9\']/;
-    var typeRE = /[a-zA-Z0-9-]/;
-    var symbolRE = /[-!#$%&*+.\/<=>?@\\^|~:]/;
-    var specialRE = /[(),;[\]`{}]/;
-    var whiteCharRE = /[ \t\v\f]/; // newlines are handled in tokenizer
-    var isoTimestamp = /[0-9:.\-TZ]/;
-
-    function normal() {
-      return function (source, setState) {
-        if (source.eatWhile(whiteCharRE)) {
-          return null;
-        }
-
-        var ch = source.next();
-
-        if (ch == '#') {
-          source.skipToEnd();
-          return "comment";
-        }
-
-        if (ch == '\'') {
-          return switchState(source, setState, stringLiteral);
-        }
-
-        if (ch == 't') {
-          if (source.eat('\'')) {
-            source.eatWhile(isoTimestamp);
-            if (source.eat('\'')) {
-              return "string-2";
-            }
-          }
-        }
-
-        if (typeRE.test(source)) {
-          source.eatWhile(typeRE);
-          return "type";
-        }
-
-        if (largeRE.test(ch)) {
-          source.eatWhile(idRE);
-          return "error";
-        }
-
-        if (smallRE.test(ch)) {
-          source.eatWhile(idRE);
-          return "variable";
-        }
-
-        if (digitRE.test(ch)) {
-          if (ch == '0') {
-            if (source.eat(/[xX]/)) {
-              source.eatWhile(hexitRE); // should require at least 1
-              return "integer";
-            }
-            if (source.eat(/[oO]/)) {
-              source.eatWhile(octitRE); // should require at least 1
-              return "number";
-            }
-          }
-          source.eatWhile(digitRE);
-          var t = "number";
-          if (source.eat('.')) {
-            t = "number";
-            source.eatWhile(digitRE); // should require at least 1
-          }
-          if (source.eat(/[eE]/)) {
-            t = "number";
-            source.eat(/[-+]/);
-            source.eatWhile(digitRE); // should require at least 1
-          }
-          return t;
-        }
-
-        if (symbolRE.test(ch)) {
-          if (ch == '#') {
-            source.skipToEnd();
-            return "comment";
-          }
-        }
-
-        return "error";
-      }
-    }
-
-    function stringLiteral(source, setState) {
-      while (!source.eol()) {
-        var ch = source.next();
-        if (ch == '\'') {
-          setState(normal());
-          return "string";
-        }
-        // escape handling: need to test correctness
-        //if (ch == '\\') {
-        //  if (source.eat('\'')) source.next();
-        //}
-      }
-      setState(normal());
-      return "error";
-    }
-
-    var wellKnownWords = (function() {
-      var wkw = {};
-
-      var keywords = <<<KEYWORDS>>>;
-
-      for (var i = keywords.length; i--;)
-        wkw[keywords[i]] = "keyword";
-
-      var ops = ["IN", "NOT", "LIKE", "MATCHES", "ISSUBSET", "in", "not", "like", "matches", "isubset", "=", "!=", "<", ">", "<=", ">=",];
-
-      for (var i = ops.length; i--;)
-        wkw[ops[i]] = "operator";
-
-      return wkw;
-    })();
-
-    return {
-      startState: function ()  { return { f: normal() }; },
-      copyState:  function (s) { return { f: s.f }; },
-
-      token: function(stream, state) {
-        var t = state.f(stream, function(s) { state.f = s; });
-        var w = stream.current();
-        return (wellKnownWords.hasOwnProperty(w)) ? wellKnownWords[w] : t;
-      }
-    };
-
-  });
-
-  CodeMirror.defineMIME("text/x-kestrel", "kestrel");
-});
diff --git a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/codemirror/setup.py b/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/codemirror/setup.py
deleted file mode 100644
index 944569fd..00000000
--- a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/codemirror/setup.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import os
-import json
-import nbclassic
-import notebook
-import pkgutil
-import kestrel
-
-
-def update_codemirror_mode():
-    for codemirror_file_path in _get_codemirror_file_paths():
-        src_current = ""
-        if os.path.isfile(codemirror_file_path):
-            try:
-                with open(codemirror_file_path) as fp:
-                    src_current = fp.read()
-            except PermissionError:
-                pass
-
-        src_latest = _instantiate_codemirror_mode_src()
-
-        if src_latest != src_current:
-            try:
-                with open(codemirror_file_path, "w") as fp:
-                    fp.write(src_latest)
-            except PermissionError:
-                pass
-
-
-################################################################
-#                       Private Functions
-################################################################
-
-
-def _get_codemirror_file_paths():
-    paths = []
-    for pkg_path in (notebook.__path__[0], nbclassic.__path__[0]):
-        codemirror_dir = os.path.join(pkg_path, "static/components/codemirror/mode")
-        if os.path.isdir(codemirror_dir):
-            kestrel_dir = os.path.join(codemirror_dir, "kestrel")
-            if not os.path.isdir(kestrel_dir):
-                try:
-                    os.mkdir(kestrel_dir)
-                except PermissionError:
-                    pass
-            paths.append(os.path.join(kestrel_dir, "kestrel.js"))
-    return paths
-
-
-def _instantiate_codemirror_mode_src():
-    keywords = json.dumps(kestrel.frontend.parser.get_keywords())
-    codemirror_src = pkgutil.get_data(__name__, "kestrel_template.js").decode("utf-8")
-    codemirror_src = codemirror_src.replace("<<<KEYWORDS>>>", keywords)
-    return codemirror_src
diff --git a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/config.py b/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/config.py
deleted file mode 100644
index 83d6c93f..00000000
--- a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/config.py
+++ /dev/null
@@ -1 +0,0 @@
-LOG_FILE_NAME = "session.log"
diff --git a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/display.py b/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/display.py
deleted file mode 100644
index 21e10883..00000000
--- a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/display.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from pandas import DataFrame
-import tempfile
-import base64
-import sqlparse
-from typing import Iterable, Mapping
-from pygments import highlight
-from pygments.lexers import guess_lexer
-from pygments.lexers.sql import SqlLexer
-from pygments.lexers.kusto import KustoLexer
-from pygments.formatters import HtmlFormatter
-import networkx as nx
-import matplotlib.pyplot as plt
-
-from kestrel.display import Display, GraphExplanation
-from kestrel.ir.graph import IRGraph
-from kestrel.ir.instructions import Instruction, DataSource, Variable, Construct
-
-
-def gen_label_mapping(g: IRGraph) -> Mapping[Instruction, str]:
-    d = {}
-    for n in g:
-        if isinstance(n, Variable):
-            d[n] = n.name
-        elif isinstance(n, Construct):
-            d[n] = n.id.hex[:4]
-        elif isinstance(n, DataSource):
-            d[n] = n.datasource
-        else:
-            d[n] = f"[{n.instruction.upper()}]"
-    return d
-
-
-def to_html_blocks(d: Display) -> Iterable[str]:
-    if isinstance(d, DataFrame):
-        yield d.to_html()
-    elif isinstance(d, GraphExplanation):
-        for graphlet in d.graphlets:
-            graph = IRGraph(graphlet.graph)
-            plt.figure(figsize=(4, 2))
-            nx.draw(
-                graph,
-                with_labels=True,
-                labels=gen_label_mapping(graph),
-                font_size=8,
-                node_size=260,
-                node_color="#bfdff5",
-            )
-            with tempfile.NamedTemporaryFile(delete_on_close=False) as tf:
-                tf.close()
-                plt.savefig(tf.name, format="png")
-                with open(tf.name, "rb") as tfx:
-                    data = tfx.read()
-
-            img = data_uri = base64.b64encode(data).decode("utf-8")
-            imgx = f'<img src="data:image/png;base64,{img}">'
-            yield imgx
-
-            query = graphlet.query.statement
-            if graphlet.query.language == "SQL":
-                lexer = SqlLexer()
-                query = sqlparse.format(query, reindent=True, keyword_case="upper")
-            elif graphlet.query.language == "KQL":
-                lexer = KustoLexer()
-            else:
-                lexer = guess_lexer(query)
-            query = highlight(query, lexer, HtmlFormatter())
-            style = "<style>" + HtmlFormatter().get_style_defs() + "</style>"
-            yield style + query
diff --git a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/kernel.py b/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/kernel.py
deleted file mode 100644
index 456cde96..00000000
--- a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/kernel.py
+++ /dev/null
@@ -1,60 +0,0 @@
-from ipykernel.kernelbase import Kernel
-import logging
-import networkx as nx
-
-from kestrel.session import Session
-from kestrel_jupyter_kernel.display import to_html_blocks
-
-
-_logger = logging.getLogger(__name__)
-
-
-class KestrelKernel(Kernel):
-    implementation = "kestrel"
-    implementation_version = "2.0"
-    language = "kestrel"
-    language_version = "2.0"
-    # https://jupyter-client.readthedocs.io/en/stable/messaging.html#msging-kernel-info
-    language_info = {"name": "kestrel", "file_extension": ".hf"}
-    banner = "Kestrel"
-
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        self.kestrel_session = Session()
-
-    def do_complete(self, code, cursor_pos):
-        return {
-            "matches": self.kestrel_session.do_complete(code, cursor_pos),
-            "cursor_end": cursor_pos,
-            "cursor_start": cursor_pos,
-            "metadata": {},
-            "status": "ok",
-        }
-
-    def do_execute(
-        self, code, silent, store_history=True, user_expressions=None, allow_stdin=False
-    ):
-        if not silent:
-            try:
-                for result in self.kestrel_session.execute_to_generate(code):
-                    for html in to_html_blocks(result):
-                        self.send_response(
-                            self.iopub_socket,
-                            "display_data",
-                            {"data": {"text/html": html}, "metadata": {}},
-                        )
-                    # how to clear output (if needed in the future):
-                    # self.send_response(self.iopub_socket, "clear_output")
-
-            except Exception as e:
-                _logger.error("Exception occurred", exc_info=True)
-                self.send_response(
-                    self.iopub_socket, "stream", {"name": "stderr", "text": str(e)}
-                )
-
-        return {
-            "status": "ok",
-            "execution_count": self.execution_count,
-            "payload": [],
-            "user_expressions": {},
-        }
diff --git a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/setup.py b/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/setup.py
deleted file mode 100644
index f0c884c4..00000000
--- a/packages-nextgen/kestrel_jupyter/src/kestrel_jupyter_kernel/setup.py
+++ /dev/null
@@ -1,50 +0,0 @@
-################################################################
-#                   Setup Kestrel Jupyter Kernel
-#
-# This module setups the Kestrel Jupyter kernel:
-#   1. install the kernel to Jupyter environment (local env)
-#   2. generate codemirror mode for Kestrel based on the
-#      installed kestrel Python package for syntax highlighting
-#   3. install the codemirror mode into Jupyter
-#
-# Install: pip will install the utility `kestrel_jupyter_setup`
-#
-# Usage: `kestrel_jupyter_setup`
-#
-################################################################
-
-import os
-import tempfile
-import json
-from jupyter_client.kernelspec import KernelSpecManager
-from kestrel_jupyter_kernel.codemirror.setup import update_codemirror_mode
-
-_KERNEL_SPEC = {
-    "argv": ["python3", "-m", "kestrel_jupyter_kernel", "-f", "{connection_file}"],
-    "display_name": "Kestrel",
-    "language": "kestrel",
-}
-
-
-def install_kernelspec():
-    with tempfile.TemporaryDirectory() as tmp_dirname:
-        kernel_dirname = os.path.join(tmp_dirname, "kestrel_kernel")
-        os.mkdir(kernel_dirname)
-        kernel_filename = os.path.join(kernel_dirname, "kernel.json")
-        with open(kernel_filename, "w") as kf:
-            json.dump(_KERNEL_SPEC, kf)
-
-        m = KernelSpecManager()
-        m.install_kernel_spec(kernel_dirname, "kestrel", user=True)
-
-
-def run():
-    print("Setup Kestrel Jupyter Kernel")
-    print("  Install new Jupyter kernel ...", end=" ")
-    install_kernelspec()
-    print("done")
-
-    # generate and install kestrel codemirrmor mode
-    print("  Compute and install syntax highlighting ...", end=" ")
-    update_codemirror_mode()
-    print("done")
diff --git a/packages-nextgen/kestrel_jupyter/tests/__init__.py b/packages-nextgen/kestrel_jupyter/tests/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages-nextgen/kestrel_jupyter/tests/test_kernel_install.py b/packages-nextgen/kestrel_jupyter/tests/test_kernel_install.py
deleted file mode 100644
index faa29375..00000000
--- a/packages-nextgen/kestrel_jupyter/tests/test_kernel_install.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from jupyter_client.kernelspec import KernelSpecManager
-
-from kestrel_jupyter_kernel.setup import install_kernelspec
-
-
-def test_kernel_install():
-    m = KernelSpecManager()
-    ks = m.get_all_specs()
-    if "kestrel" in ks:
-        m.remove_kernel_spec("kestrel")
-
-    install_kernelspec()
-    assert "kestrel" in m.get_all_specs()
diff --git a/packages-nextgen/kestrel_jupyter/tests/test_notebook_syntax_gen.py b/packages-nextgen/kestrel_jupyter/tests/test_notebook_syntax_gen.py
deleted file mode 100644
index 8511a28a..00000000
--- a/packages-nextgen/kestrel_jupyter/tests/test_notebook_syntax_gen.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from os.path import exists
-
-from kestrel_jupyter_kernel.codemirror.setup import (
-    update_codemirror_mode,
-    _get_codemirror_file_paths,
-)
-
-
-def test_notebook_syntax_gen():
-    js_paths = _get_codemirror_file_paths()
-    update_codemirror_mode()
-    for js_path in js_paths:
-        assert exists(js_path)
diff --git a/packages/kestrel_datasource_stixshifter/pyproject.toml b/packages/kestrel_datasource_stixshifter/pyproject.toml
index 05e831f7..ecc22b3e 100644
--- a/packages/kestrel_datasource_stixshifter/pyproject.toml
+++ b/packages/kestrel_datasource_stixshifter/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "kestrel_datasource_stixshifter"
-version = "1.8.2"
+version = "1.8.3"
 description = "Kestrel STIX-shifter Datasource Interface"
 readme = "README.rst"
 requires-python = ">=3.8"
@@ -28,7 +28,7 @@ classifiers = [
 
 dependencies = [
     "kestrel_core>=1.8.1",
-    "lxml>=5.2.1",
+    "lxml==4.9.4", # Python 3.8 on mac error >5.0.0; stackoverflow #75442675
     "requests>=2.31.0",
     "nest-asyncio>=1.6.0",
     "stix-shifter==7.0.6",
diff --git a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/cli.py b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/cli.py
index 3cc90fb5..b4cc2df6 100644
--- a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/cli.py
+++ b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/cli.py
@@ -1,18 +1,22 @@
 import argparse
 import datetime
 import logging
+import sys
 from kestrel_datasource_stixshifter.diagnosis import Diagnosis
 from kestrel_datasource_stixshifter.connector import setup_connector_module
 from firepit.timestamp import timefmt
 
 
-def default_patterns(use_now_as_stop_time: bool):
-    start_time = "START t'2000-01-01T00:00:00.000Z'"
-    stop_time = (
-        f"STOP t'{timefmt(datetime.datetime.utcnow())}'"
-        if use_now_as_stop_time
-        else "STOP t'3000-01-01T00:00:00.000Z'"
-    )
+def default_patterns(start=None, stop=None, last_minutes=0):
+    if start:
+        start_time = f"START t'{start}'"
+        stop_time = f"STOP t'{stop}'"
+    else:
+        to_time = datetime.datetime.utcnow()
+        from_time = timefmt(to_time - datetime.timedelta(minutes=last_minutes))
+        to_time = timefmt(to_time)
+        start_time = f"START t'{from_time}'"
+        stop_time = f"STOP t'{to_time}'"
     patterns = [
         "[ipv4-addr:value != '255.255.255.255']",
         "[process:pid > 0]",
@@ -45,9 +49,23 @@ def stix_shifter_diag():
     )
     parser.add_argument(
         "--stop-at-now",
-        help="use the current timestamp as the STOP time instead of default year 3000 for default patterns",
+        help="ignored (retained for backwards compatibility)",
         action="store_true",
     )
+    parser.add_argument(
+        "--start",
+        help="start time for default pattern search (%Y-%m-%dT%H:%M:%S.%fZ)",
+    )
+    parser.add_argument(
+        "--stop",
+        help="stop time for default pattern search (%Y-%m-%dT%H:%M:%S.%fZ)",
+    )
+    parser.add_argument(
+        "--last-minutes",
+        help="relative timespan for default pattern searches in minutes",
+        default=5,
+        type=int,
+    )
     parser.add_argument(
         "-t",
         "--translate-only",
@@ -68,13 +86,21 @@ def stix_shifter_diag():
         ch.setFormatter(formatter)
         logger.addHandler(ch)
 
+    if (args.start and not args.stop) or (args.stop and not args.start):
+        print(
+            "Must specify both --start and --stop for absolute time range; else use --last-minutes",
+            file=sys.stderr,
+        )
+        parser.print_usage(sys.stderr)
+        sys.exit(1)
+
     if args.stix_pattern:
         patterns = [args.stix_pattern]
     elif args.pattern_file:
         with open(args.pattern_file) as pf:
             patterns = [pf.read()]
     else:
-        patterns = default_patterns(args.stop_at_now)
+        patterns = default_patterns(args.start, args.stop, args.last_minutes)
 
     diag = Diagnosis(args.datasource)
 
diff --git a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/config.py b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/config.py
index 73eb8ff8..e0f4be9e 100644
--- a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/config.py
+++ b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/config.py
@@ -20,6 +20,7 @@
 ENV_VAR_PREFIX = "STIXSHIFTER_"
 RETRIEVAL_BATCH_SIZE = 2000
 SINGLE_BATCH_TIMEOUT = 60
+SUBQUERY_TIME_WINDOW_IN_SECONDS = 0  # if >0, then segment START/STOP into this Windows Size to file multiple subqueries
 COOL_DOWN_AFTER_TRANSMISSION = 0
 ALLOW_DEV_CONNECTOR = False
 VERIFY_CERT = True
@@ -184,6 +185,14 @@ def get_datasource_from_profiles(profile_name, profiles):
             profile_name,
         )
 
+        subquery_time_window = _extract_param_from_connection_config(
+            "subquery_time_window",
+            int,
+            SUBQUERY_TIME_WINDOW_IN_SECONDS,
+            connection,
+            profile_name,
+        )
+
     return (
         connector_name,
         connection,
@@ -192,6 +201,7 @@ def get_datasource_from_profiles(profile_name, profiles):
         cool_down_after_transmission,
         allow_dev_connector,
         verify_cert,
+        subquery_time_window,
     )
 
 
diff --git a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/diagnosis.py b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/diagnosis.py
index c3631f7a..c725e543 100644
--- a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/diagnosis.py
+++ b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/diagnosis.py
@@ -30,6 +30,7 @@ def __init__(self, datasource_name):
             self.cool_down_after_transmission,
             self.allow_dev_connector,
             self.verify_cert,
+            self.subquery_time_window,
         ) = get_datasource_from_profiles(datasource_name, self.profiles)
         self.if_fast_translation = (
             self.connector_name in self.kestrel_options["fast_translate"]
@@ -50,6 +51,10 @@ def diagnose_config(self):
         print("#### Kestrel specific config")
         print(f"retrieval batch size: {self.retrieval_batch_size}")
         print(f"cool down after transmission: {self.cool_down_after_transmission}")
+        print(f"allow unverified connector: {self.allow_dev_connector}")
+        print(f"verify SSL or not: {self.verify_cert}")
+        print(f"split query into subquery: {bool(self.subquery_time_window)}")
+        print(f"subquery with time window (in seconds): {self.subquery_time_window}")
         print(f"enable fast translation: {self.if_fast_translation}")
 
         print()
diff --git a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/interface.py b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/interface.py
index 9435cebe..3c2c43a5 100644
--- a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/interface.py
+++ b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/interface.py
@@ -32,6 +32,7 @@
                         retrieval_batch_size: 10000  # set to 10000 to match default Elasticsearch page size; Kestrel default across connectors: 2000
                         single_batch_timeout: 120  # increase it if hit 60 seconds (Kestrel default) timeout error for each batch of retrieval
                         cool_down_after_transmission: 2  # seconds to cool down between data source API calls, required by some API such as sentinelone; Kestrel default: 0
+                        subquery_time_window: 3600 # split each query into multiple subqueries with smaller time windows specified here in seconds; Kestrel default: 0 (not split query)
                         allow_dev_connector: True  # do not check version of a connector to allow custom/testing connector installed with any version; Kestrel default: False
                         dialects:  # more info: https://github.com/opencybersecurityalliance/stix-shifter/tree/develop/stix_shifter_modules/elastic_ecs#dialects
                           - beats  # need it if the index is created by Filebeat/Winlogbeat/*beat
diff --git a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/query.py b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/query.py
index 46b07b7f..603dc67e 100644
--- a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/query.py
+++ b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/query.py
@@ -11,6 +11,7 @@
 from kestrel.exceptions import DataSourceError, DataSourceManagerInternalError
 from kestrel_datasource_stixshifter.connector import setup_connector_module
 from kestrel_datasource_stixshifter import multiproc
+from kestrel_datasource_stixshifter.subquery import split_subquery_by_time_window
 from kestrel_datasource_stixshifter.config import (
     get_datasource_from_profiles,
     load_options,
@@ -63,16 +64,16 @@ def query_datasource(uri, pattern, session_id, config, store, limit=None):
     _logger.debug(f"prepare query with ID: {query_id}")
 
     num_records = 0
-    profile_limit = limit
+    limit_per_profile = limit
 
     for profile in profiles:
         if limit:
             if num_records >= limit:
                 break
             if num_records > 0:
-                profile_limit = limit - num_records
+                limit_per_profile = limit - num_records
         _logger.debug(f"entering stix-shifter data source: {profile}")
-        _logger.debug(f"profile = {profile}, profile_limit = {profile_limit}")
+        _logger.debug(f"profile = {profile}, limit_per_profile = {limit_per_profile}")
         # STIX-shifter will alter the config objects, thus making them not reusable.
         # So only give STIX-shifter a copy of the configs.
         # Check `modernize` functions in the `stix_shifter_utils` for details.
@@ -84,6 +85,7 @@ def query_datasource(uri, pattern, session_id, config, store, limit=None):
             cool_down_after_transmission,
             allow_dev_connector,
             verify_cert,
+            subquery_time_window,
         ) = map(
             copy.deepcopy, get_datasource_from_profiles(profile, config["profiles"])
         )
@@ -98,43 +100,52 @@ def query_datasource(uri, pattern, session_id, config, store, limit=None):
 
         observation_metadata = gen_observation_metadata(connector_name, query_id)
 
-        dsl = translate_query(
-            connector_name, observation_metadata, pattern, connection_dict
-        )
+        for pattern in split_subquery_by_time_window(pattern, subquery_time_window):
+
+            if limit_per_profile:
+                if num_records >= limit_per_profile:
+                    _logger.debug("do not execute subquery due to limit return reached")
+                    break
+                if num_records > 0:
+                    limit_per_profile = limit_per_profile - num_records
 
-        raw_records_queue = Queue()
-        translated_data_queue = Queue()
+            dsl = translate_query(
+                connector_name, observation_metadata, pattern, connection_dict
+            )
 
-        exceptions = []
+            raw_records_queue = Queue()
+            translated_data_queue = Queue()
 
-        with multiproc.translate(
-            connector_name,
-            observation_metadata,
-            connection_dict.get("options", {}),
-            cache_data_path_prefix,
-            connector_name in config["options"]["fast_translate"],
-            raw_records_queue,
-            translated_data_queue,
-            config["options"]["translation_workers_count"],
-        ):
-            with multiproc.transmit(
+            exceptions = []
+
+            with multiproc.translate(
                 connector_name,
-                connection_dict,
-                configuration_dict,
-                retrieval_batch_size,
-                config["options"]["translation_workers_count"],
-                cool_down_after_transmission,
-                verify_cert,
-                dsl["queries"],
+                observation_metadata,
+                connection_dict.get("options", {}),
+                cache_data_path_prefix,
+                connector_name in config["options"]["fast_translate"],
                 raw_records_queue,
-                profile_limit,
+                translated_data_queue,
+                config["options"]["translation_workers_count"],
             ):
-                for result in multiproc.read_translated_results(
-                    translated_data_queue,
+                with multiproc.transmit(
+                    connector_name,
+                    connection_dict,
+                    configuration_dict,
+                    retrieval_batch_size,
                     config["options"]["translation_workers_count"],
+                    cool_down_after_transmission,
+                    verify_cert,
+                    dsl["queries"],
+                    raw_records_queue,
+                    limit_per_profile,
                 ):
-                    num_records += get_num_objects(result)
-                    ingest(result, observation_metadata, query_id, store)
+                    for result in multiproc.read_translated_results(
+                        translated_data_queue,
+                        config["options"]["translation_workers_count"],
+                    ):
+                        num_records += get_num_objects(result)
+                        ingest(result, observation_metadata, query_id, store)
 
     return ReturnFromStore(query_id)
 
diff --git a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/subquery.py b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/subquery.py
new file mode 100644
index 00000000..116c4c34
--- /dev/null
+++ b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/subquery.py
@@ -0,0 +1,47 @@
+import logging
+from typeguard import typechecked
+from typing import Iterable
+from datetime import timedelta
+
+from firepit import timestamp
+
+
+_logger = logging.getLogger(__name__)
+
+
+@typechecked
+def split_subquery_by_time_window(
+    stix_pattern: str, time_win_unit_in_seconds: int
+) -> Iterable[str]:
+    if not time_win_unit_in_seconds:
+        _logger.debug("not use time-window-based subquery")
+        yield stix_pattern
+    else:
+        items = stix_pattern.split()
+        if items[-2] != "STOP" or items[-4] != "START":
+            # no timestamp in pattern
+            _logger.debug("not use subquery due to no time range")
+            yield stix_pattern
+        else:
+            stop_entire = timestamp.to_datetime(items[-1][2:-1])
+            start_entire = timestamp.to_datetime(items[-3][2:-1])
+            stop = stop_entire
+            start = start_entire
+            time_window_unit = timedelta(seconds=time_win_unit_in_seconds)
+            while stop - time_window_unit > start_entire:
+                start = stop - time_window_unit
+                _items = items[:]
+                _items[-3] = f"t'{timestamp.timefmt(start)}'"
+                _items[-1] = f"t'{timestamp.timefmt(stop)}'"
+                subquery_pattern = " ".join(_items)
+                _logger.debug(f"subquery pattern generated: {subquery_pattern}")
+                yield subquery_pattern
+                stop = start
+            else:
+                start = start_entire
+                _items = items[:]
+                _items[-3] = f"t'{timestamp.timefmt(start)}'"
+                _items[-1] = f"t'{timestamp.timefmt(stop)}'"
+                subquery_pattern = " ".join(_items)
+                _logger.debug(f"subquery pattern generated: {subquery_pattern}")
+                yield subquery_pattern
diff --git a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/worker/transmitter.py b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/worker/transmitter.py
index 31534781..423d7566 100644
--- a/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/worker/transmitter.py
+++ b/packages/kestrel_datasource_stixshifter/src/kestrel_datasource_stixshifter/worker/transmitter.py
@@ -192,9 +192,16 @@ def retrieve_data(self):
                     )
 
                     # prepare for next round retrieval
-                    result_retrieval_offset += len(result_batch["data"])
+                    result_len = len(result_batch["data"])
+                    result_retrieval_offset += result_len
+
+                    if result_len < batch_size:
+                        has_remaining_results = False
+
                     if "metadata" in result_batch:
                         metadata = result_batch["metadata"]
+                    else:
+                        has_remaining_results = False
 
                     if self.limit:
                         if result_retrieval_offset >= self.limit:
diff --git a/packages/kestrel_datasource_stixshifter/tests/test_command_get.py b/packages/kestrel_datasource_stixshifter/tests/test_command_get.py
index 058205f3..624eb09c 100644
--- a/packages/kestrel_datasource_stixshifter/tests/test_command_get.py
+++ b/packages/kestrel_datasource_stixshifter/tests/test_command_get.py
@@ -166,11 +166,8 @@ def test_get_multiple_stixshifter_stix_limit_1(set_no_prefetch_kestrel_config, s
         s.execute(stmt)
         v = s.get_variable("var")
 
-        # The extended graph [ipv4-addr:value = '127.0.0.1'] is recognized and
-        # merged to prefetch query, resultsing in limited (32) processes. If
-        # not used by prefetch, the total number of process records prefetched
-        # is 240.
-        assert len(v) == 28
+        # HOST1 returns 26, which is larger than 15
+        assert len(v) == 26
         for i in range(len(v)):
             assert v[i]["type"] == "process"
             assert v[i]["name"] in [
diff --git a/packages/kestrel_datasource_stixshifter/tests/test_stixshifter.py b/packages/kestrel_datasource_stixshifter/tests/test_stixshifter.py
index 610a513c..93722c41 100644
--- a/packages/kestrel_datasource_stixshifter/tests/test_stixshifter.py
+++ b/packages/kestrel_datasource_stixshifter/tests/test_stixshifter.py
@@ -79,6 +79,7 @@ def test_yaml_profiles_refresh(tmp_path):
                 cool_down_after_transmission: 5
                 allow_dev_connector: True
                 verify_cert: false
+                subquery_time_window: 600
                 dialects:
                     - beats
         config:
@@ -107,7 +108,7 @@ def test_yaml_profiles_refresh(tmp_path):
 
         ss_config = s.config["datasources"]["kestrel_datasource_stixshifter"]
         ss_profiles = ss_config["profiles"]
-        connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission, allow_dev_connector, verify_cert = get_datasource_from_profiles("host101", ss_profiles)
+        connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission, allow_dev_connector, verify_cert, subquery_time_window = get_datasource_from_profiles("host101", ss_profiles)
         assert connector_name == "elastic_ecs"
         assert configuration["auth"]["id"] == "profileA"
         assert configuration["auth"]["api_key"] == "qwer"
@@ -116,6 +117,7 @@ def test_yaml_profiles_refresh(tmp_path):
         assert retrieval_batch_size == 2000
         assert cool_down_after_transmission == 0
         assert verify_cert == True
+        assert subquery_time_window == 0
 
         with open(profile_file, "w") as pf:
             pf.write(profileB)
@@ -124,7 +126,7 @@ def test_yaml_profiles_refresh(tmp_path):
 
         # need to refresh the pointers since the dict is updated
         ss_profiles = ss_config["profiles"]
-        connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission, allow_dev_connector, verify_cert = get_datasource_from_profiles("host101", ss_profiles)
+        connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission, allow_dev_connector, verify_cert, subquery_time_window = get_datasource_from_profiles("host101", ss_profiles)
         assert connector_name == "elastic_ecs"
         assert configuration["auth"]["id"] == "profileB"
         assert configuration["auth"]["api_key"] == "xxxxxx"
@@ -134,5 +136,6 @@ def test_yaml_profiles_refresh(tmp_path):
         assert cool_down_after_transmission == 5
         assert allow_dev_connector == True
         assert verify_cert == False
+        assert subquery_time_window == 600
 
     del os.environ["KESTREL_STIXSHIFTER_CONFIG"]
diff --git a/packages/kestrel_datasource_stixshifter/tests/test_stixshifter_diagnosis.py b/packages/kestrel_datasource_stixshifter/tests/test_stixshifter_diagnosis.py
index e406306b..a382cff0 100644
--- a/packages/kestrel_datasource_stixshifter/tests/test_stixshifter_diagnosis.py
+++ b/packages/kestrel_datasource_stixshifter/tests/test_stixshifter_diagnosis.py
@@ -31,6 +31,10 @@ def test_cli(stixshifter_profile_lab101):
 #### Kestrel specific config
 retrieval batch size: 2000
 cool down after transmission: 0
+allow unverified connector: False
+verify SSL or not: True
+split query into subquery: False
+subquery with time window (in seconds): 0
 enable fast translation: False
 
 #### Config to be passed to stix-shifter
@@ -78,7 +82,7 @@ def test_cli(stixshifter_profile_lab101):
 """
 
     result = subprocess.run(
-        args=[STIX_SHIFTER_DIAG, "lab101"],
+        args=[STIX_SHIFTER_DIAG, "--start=2000-01-01T00:00:00.000Z", "--stop=3000-01-01T00:00:00.000Z", "lab101"],
         universal_newlines=True,
         stdout=subprocess.PIPE,
     )
@@ -98,6 +102,10 @@ def test_cli_ecs(stixshifter_profile_ecs):
 #### Kestrel specific config
 retrieval batch size: 2000
 cool down after transmission: 0
+allow unverified connector: False
+verify SSL or not: True
+split query into subquery: False
+subquery with time window (in seconds): 0
 enable fast translation: False
 
 #### Config to be passed to stix-shifter
diff --git a/packages/kestrel_jupyter/pyproject.toml b/packages/kestrel_jupyter/pyproject.toml
index 1375304b..d54f4c0e 100644
--- a/packages/kestrel_jupyter/pyproject.toml
+++ b/packages/kestrel_jupyter/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "kestrel_jupyter"
-version = "1.8.4"
+version = "1.8.5"
 description = "Kestrel Jupyter Kernel"
 readme = "README.rst"
 requires-python = ">=3.8"
@@ -28,7 +28,7 @@ classifiers = [
 dependencies = [
     "kestrel_core==1.8.2",
     "kestrel_datasource_stixbundle==1.8.0",
-    "kestrel_datasource_stixshifter==1.8.2",
+    "kestrel_datasource_stixshifter==1.8.3",
     "kestrel_analytics_python==1.8.0",
     "kestrel_analytics_docker==1.8.1",
     "jupyterlab-server",