diff --git a/stix2/datastore/relational_db/relational_db.py b/stix2/datastore/relational_db/relational_db.py index 10f1c8c8..2aa722cc 100644 --- a/stix2/datastore/relational_db/relational_db.py +++ b/stix2/datastore/relational_db/relational_db.py @@ -1,4 +1,4 @@ -from sqlalchemy import MetaData, create_engine +from sqlalchemy import MetaData, create_engine, select from sqlalchemy.schema import CreateSchema, CreateTable, Sequence from stix2.base import _STIXBase @@ -6,23 +6,13 @@ from stix2.datastore.relational_db.input_creation import ( generate_insert_for_object, ) -from stix2.datastore.relational_db.table_creation import ( - create_core_tables, generate_object_table, +from stix2.datastore.relational_db.table_creation import create_table_objects +from stix2.datastore.relational_db.utils import ( + canonicalize_table_name, schema_for, table_name_for, ) -from stix2.datastore.relational_db.utils import canonicalize_table_name from stix2.parsing import parse -from stix2.v21.base import ( - _DomainObject, _Extension, _MetaObject, _Observable, _RelationshipObject, -) - - -def _get_all_subclasses(cls): - all_subclasses = [] - - for subclass in cls.__subclasses__(): - all_subclasses.append(subclass) - all_subclasses.extend(_get_all_subclasses(subclass)) - return all_subclasses +import stix2.registry +import stix2.utils def _add(store, stix_data, allow_custom=True, version="2.1"): @@ -63,82 +53,101 @@ def _add(store, stix_data, allow_custom=True, version="2.1"): class RelationalDBStore(DataStoreMixin): - """Interface to a file directory of STIX objects. - - FileSystemStore is a wrapper around a paired FileSystemSink - and FileSystemSource. - - Args: - stix_dir (str): path to directory of STIX objects - allow_custom (bool): whether to allow custom STIX content to be - pushed/retrieved. Defaults to True for FileSystemSource side - (retrieving data) and False for FileSystemSink - side(pushing data). However, when parameter is supplied, it - will be applied to both FileSystemSource and FileSystemSink. - bundlify (bool): whether to wrap objects in bundles when saving - them. Default: False. - encoding (str): The encoding to use when reading a file from the - filesystem. - - Attributes: - source (FileSystemSource): FileSystemSource - sink (FileSystemSink): FileSystemSink + def __init__( + self, database_connection_url, allow_custom=True, version=None, + instantiate_database=True, *stix_object_classes + ): + """ + Initialize this store. + + Args: + database_connection_url: An SQLAlchemy URL referring to a database + allow_custom: Whether custom content is allowed when processing + dict content to be added to the store + version: TODO: unused so far + instantiate_database: Whether tables, etc should be created in the + database (only necessary the first time) + *stix_object_classes: STIX object classes to map into table schemas + (and ultimately database tables, if instantiation is desired). + This can be used to limit which table schemas are created, if + one is only working with a subset of STIX types. If not given, + auto-detect all classes and create table schemas for all of + them. + """ + database_connection = create_engine(database_connection_url) - """ - def __init__(self, database_connection_url, allow_custom=None, encoding='utf-8'): - if allow_custom is None: - allow_custom_source = True - allow_custom_sink = False - else: - allow_custom_sink = allow_custom_source = allow_custom + self.metadata = MetaData() + create_table_objects( + self.metadata, stix_object_classes + ) - super(RelationalDBStore, self).__init__( - source=RelationalDBSource(database_connection_url, allow_custom=allow_custom_source, encoding=encoding), - sink=RelationalDBSink(database_connection_url, allow_custom=allow_custom_sink), + super().__init__( + source=RelationalDBSource( + database_connection, + metadata=self.metadata + ), + sink=RelationalDBSink( + database_connection, + allow_custom=allow_custom, + version=version, + instantiate_database=instantiate_database, + metadata=self.metadata + ), ) class RelationalDBSink(DataSink): - """Interface for adding/pushing STIX objects to an in-memory dictionary. - - Designed to be paired with a MemorySource, together as the two - components of a MemoryStore. - - Args: - stix_data (dict OR list): valid STIX 2.0 content in - bundle or a list. - _store (bool): whether the MemorySink is a part of a MemoryStore, - in which case "stix_data" is a direct reference to - shared memory with DataSource. Not user supplied - allow_custom (bool): whether to allow custom objects/properties - when exporting STIX content to file. - Default: True. - version (str): If present, it forces the parser to use the version - provided. Otherwise, the library will make the best effort based - on checking the "spec_version" property. - - Attributes: - _data (dict): the in-memory dict that holds STIX objects. - If part of a MemoryStore, the dict is shared with a MemorySource - - """ def __init__( - self, database_connection_url, allow_custom=True, version=None, - instantiate_database=True, + self, database_connection_or_url, allow_custom=True, version=None, + instantiate_database=True, *stix_object_classes, metadata=None ): + """ + Initialize this sink. Only one of stix_object_classes and metadata + should be given: if the latter is given, assume table schemas are + already created. + + Args: + database_connection_or_url: An SQLAlchemy engine object, or URL + allow_custom: Whether custom content is allowed when processing + dict content to be added to the sink + version: TODO: unused so far + instantiate_database: Whether tables, etc should be created in the + database (only necessary the first time) + *stix_object_classes: STIX object classes to map into table schemas + (and ultimately database tables, if instantiation is desired). + This can be used to limit which table schemas are created, if + one is only working with a subset of STIX types. If not given, + auto-detect all classes and create table schemas for all of + them. If metadata is given, the table data therein is used and + this argument is ignored. + metadata: SQLAlchemy MetaData object containing table information. + Only applicable when this class is instantiated via a store, + so that table information can be constructed once and shared + between source and sink. + """ super(RelationalDBSink, self).__init__() - self.allow_custom = allow_custom - self.metadata = MetaData() - self.database_connection = create_engine(database_connection_url) - self._create_schemas() + if isinstance(database_connection_or_url, str): + self.database_connection = create_engine(database_connection_or_url) + else: + self.database_connection = database_connection_or_url + + if metadata: + self.metadata = metadata + else: + self.metadata = MetaData() + create_table_objects( + self.metadata, stix_object_classes + ) + + self.allow_custom = allow_custom - self.tables = self._create_table_objects() self.tables_dictionary = dict() - for t in self.tables: + for t in self.metadata.tables.values(): self.tables_dictionary[canonicalize_table_name(t.name, t.schema)] = t if instantiate_database: + self._create_schemas() self._instantiate_database() def _create_schemas(self): @@ -148,34 +157,12 @@ def _create_schemas(self): trans.execute(CreateSchema("sco", if_not_exists=True)) trans.execute(CreateSchema("sro", if_not_exists=True)) - def _create_table_objects(self): - self.sequence = Sequence("my_general_seq", metadata=self.metadata, start=1) - tables = create_core_tables(self.metadata) - for stix_class in _get_all_subclasses(_DomainObject): - new_tables = generate_object_table(stix_class, self.metadata, "sdo") - tables.extend(new_tables) - for stix_class in _get_all_subclasses(_RelationshipObject): - new_tables = generate_object_table(stix_class, self.metadata, "sro") - tables.extend(new_tables) - for stix_class in _get_all_subclasses(_Observable): - tables.extend(generate_object_table(stix_class, self.metadata, "sco")) - for stix_class in _get_all_subclasses(_MetaObject): - tables.extend(generate_object_table(stix_class, self.metadata, "common")) - for stix_class in _get_all_subclasses(_Extension): - if stix_class.extension_type not in ["new-sdo", "new-sco", "new-sro"]: - if hasattr(stix_class, "_applies_to"): - schema_name = stix_class._applies_to - else: - schema_name = "sco" - tables.extend(generate_object_table(stix_class, self.metadata, schema_name, is_extension=True)) - return tables - def _instantiate_database(self): - # self.sequence = Sequence("my_general_seq", metadata=self.metadata, start=1) + self.sequence = Sequence("my_general_seq", metadata=self.metadata, start=1) self.metadata.create_all(self.database_connection) def generate_stix_schema(self): - for t in self.tables: + for t in self.metadata.tables.values(): print(CreateTable(t).compile(self.database_connection)) print() @@ -194,8 +181,82 @@ def insert_object(self, stix_object): class RelationalDBSource(DataSource): + def __init__( + self, database_connection_or_url, *stix_object_classes, metadata=None + ): + """ + Initialize this source. Only one of stix_object_classes and metadata + should be given: if the latter is given, assume table schemas are + already created. Instances of this class do not create the actual + database tables; see the source/sink for that. + + Args: + database_connection_or_url: An SQLAlchemy engine object, or URL + *stix_object_classes: STIX object classes to map into table schemas. + This can be used to limit which schemas are created, if one is + only working with a subset of STIX types. If not given, + auto-detect all classes and create schemas for all of them. + If metadata is given, the table data therein is used and this + argument is ignored. + metadata: SQLAlchemy MetaData object containing table information. + Only applicable when this class is instantiated via a store, + so that table information can be constructed once and shared + between source and sink. + """ + super().__init__() + + if isinstance(database_connection_or_url, str): + self.database_connection = create_engine(database_connection_or_url) + else: + self.database_connection = database_connection_or_url + + if metadata: + self.metadata = metadata + else: + self.metadata = MetaData() + create_table_objects( + self.metadata, stix_object_classes + ) + def get(self, stix_id, version=None, _composite_filters=None): - pass + + stix_type = stix2.utils.get_type_from_id(stix_id) + stix_class = stix2.registry.class_for_type( + # TODO: give user control over STIX version used? + stix_type, stix_version=stix2.DEFAULT_VERSION + ) + + # Info about the type-specific table + type_table_name = table_name_for(stix_type) + type_schema_name = schema_for(stix_class) + type_table = self.metadata.tables[f"{type_schema_name}.{type_table_name}"] + + # Some fixed info about core tables + if type_schema_name == "sco": + core_table_name = "common.core_sco" + else: + # for SROs and SMOs too? + core_table_name = "common.core_sdo" + + core_table = self.metadata.tables[core_table_name] + + # Both core and type-specific tables have "id"; let's not duplicate + # that in the result set columns. Is there a better way to do this? + type_cols_except_id = ( + col for col in type_table.c if col.key != "id" + ) + + core_type_select = select(core_table, *type_cols_except_id) \ + .join(type_table) \ + .where(core_table.c.id == stix_id) + + obj_dict = {} + with self.database_connection.begin() as conn: + # Should be at most one matching row + sco_data = conn.execute(core_type_select).mappings().first() + obj_dict.update(sco_data) + + return stix_class(**obj_dict, allow_custom=True) def all_versions(self, stix_id, version=None, _composite_filters=None): pass diff --git a/stix2/datastore/relational_db/relational_db_testing.py b/stix2/datastore/relational_db/relational_db_testing.py index 695f735b..7a8dd228 100644 --- a/stix2/datastore/relational_db/relational_db_testing.py +++ b/stix2/datastore/relational_db/relational_db_testing.py @@ -3,7 +3,10 @@ import pytz import stix2 -from stix2.datastore.relational_db.relational_db import RelationalDBSink +from stix2.datastore.relational_db.relational_db import ( + RelationalDBSink, RelationalDBSource, RelationalDBStore +) +import stix2.properties directory_stix_object = stix2.Directory( path="/foo/bar/a", @@ -21,6 +24,7 @@ ) s = stix2.v21.Software( + id="software--28897173-7314-4eec-b1cf-2c625b635bf6", name="Word", cpe="cpe:2.3:a:microsoft:word:2000:*:*:*:*:*:*:*", swid="com.acme.rms-ce-v4-1-5-0", @@ -94,8 +98,19 @@ def file_example_with_PDFExt_Object(): def main(): - store = RelationalDBSink("postgresql://localhost/stix-data-sink") - store.generate_stix_schema() + store = RelationalDBStore( + "postgresql://localhost/stix-data-sink", + False, + None, + True, + stix2.Directory + ) + store.sink.generate_stix_schema() + + store.add(directory_stix_object) + + read_obj = store.get(directory_stix_object.id) + print(read_obj) if __name__ == '__main__': diff --git a/stix2/datastore/relational_db/table_creation.py b/stix2/datastore/relational_db/table_creation.py index 96743c9a..e8b31240 100644 --- a/stix2/datastore/relational_db/table_creation.py +++ b/stix2/datastore/relational_db/table_creation.py @@ -8,6 +8,7 @@ from stix2.datastore.relational_db.add_method import add_method from stix2.datastore.relational_db.utils import ( SCO_COMMON_PROPERTIES, SDO_COMMON_PROPERTIES, canonicalize_table_name, + flat_classes, get_stix_object_classes, schema_for, ) from stix2.properties import ( BinaryProperty, BooleanProperty, DictionaryProperty, @@ -16,6 +17,7 @@ ObjectReferenceProperty, Property, ReferenceProperty, StringProperty, TimestampProperty, TypeProperty, ) +from stix2.v21.base import _Extension from stix2.v21.common import KillChainPhase @@ -667,3 +669,32 @@ def create_core_tables(metadata): ] tables.extend(create_external_references_tables(metadata)) return tables + + +def create_table_objects(metadata, stix_object_classes): + if stix_object_classes: + # If classes are given, allow some flexibility regarding lists of + # classes vs single classes + stix_object_classes = flat_classes(stix_object_classes) + + else: + # If no classes given explicitly, discover them automatically + stix_object_classes = get_stix_object_classes() + + tables = create_core_tables(metadata) + + for stix_class in stix_object_classes: + + schema_name = schema_for(stix_class) + is_extension = issubclass(stix_class, _Extension) + + tables.extend( + generate_object_table( + stix_class, + metadata, + schema_name, + is_extension=is_extension + ) + ) + + return tables diff --git a/stix2/datastore/relational_db/utils.py b/stix2/datastore/relational_db/utils.py index e45897ac..42f16c03 100644 --- a/stix2/datastore/relational_db/utils.py +++ b/stix2/datastore/relational_db/utils.py @@ -1,4 +1,9 @@ +from collections.abc import Iterable, Mapping import inflection +from stix2.v21.base import ( + _DomainObject, _Extension, _MetaObject, _Observable, _RelationshipObject, +) + # Helps us know which data goes in core, and which in a type-specific table. SCO_COMMON_PROPERTIES = { @@ -36,3 +41,72 @@ def canonicalize_table_name(table_name, schema_name=None): full_name = table_name full_name = full_name.replace("-", "_") return inflection.underscore(full_name) + + +def _get_all_subclasses(cls): + all_subclasses = [] + + for subclass in cls.__subclasses__(): + all_subclasses.append(subclass) + all_subclasses.extend(_get_all_subclasses(subclass)) + return all_subclasses + + +def get_stix_object_classes(): + yield from _get_all_subclasses(_DomainObject) + yield from _get_all_subclasses(_RelationshipObject) + yield from _get_all_subclasses(_Observable) + yield from _get_all_subclasses(_MetaObject) + # Non-object extensions (property or toplevel-property only) + for ext_cls in _get_all_subclasses(_Extension): + if ext_cls.extension_type in ( + "property-extension", "toplevel-property-extension" + ): + yield ext_cls + + +def schema_for(stix_class): + + if issubclass(stix_class, _DomainObject): + schema_name = "sdo" + elif issubclass(stix_class, _RelationshipObject): + schema_name = "sro" + elif issubclass(stix_class, _Observable): + schema_name = "sco" + elif issubclass(stix_class, _MetaObject): + schema_name = "common" + elif issubclass(stix_class, _Extension): + schema_name = getattr(stix_class, "_applies_to", "sco") + else: + schema_name = None + + return schema_name + + +def table_name_for(stix_type_or_class): + if isinstance(stix_type_or_class, str): + table_name = stix_type_or_class + else: + # A _STIXBase subclass + table_name = getattr(stix_type_or_class, "_type", stix_type_or_class.__name__) + + # Applies to registered extension-definition style extensions only. + # Their "_type" attribute is actually set to the extension definition ID, + # rather than a STIX type. + if table_name.startswith("extension-definition"): + table_name = table_name[0:30] + + table_name = canonicalize_table_name(table_name) + return table_name + + +def flat_classes(class_or_classes): + if isinstance(class_or_classes, Iterable) and not isinstance( + # Try to generically detect STIX objects, which are iterable, but we + # don't want to iterate through those. + class_or_classes, Mapping + ): + for class_ in class_or_classes: + yield from flat_classes(class_) + else: + yield class_or_classes