Skip to content

Commit

Permalink
Merge pull request #80 from equalitie/model_retrain_live_load
Browse files Browse the repository at this point in the history
Deliverable: Model with ability for dynamic updates
  • Loading branch information
mkaranasou authored Apr 15, 2021
2 parents e449f85 + a2d3bea commit a56fcca
Show file tree
Hide file tree
Showing 39 changed files with 1,415 additions and 392 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,5 @@ dmypy.json

# Pyre type checker
.pyre/

ip_cache/
6 changes: 3 additions & 3 deletions alembic/versions/88eb5854154f_add_id_group_in_request_sets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""add id_request_sets in request_sets
"""add uuid_request_set in request_sets
Revision ID: 88eb5854154f
Revises:
Expand All @@ -16,8 +16,8 @@


def upgrade():
op.add_column('request_sets', sa.Column('id_request_sets', sa.TEXT))
op.add_column('request_sets', sa.Column('uuid_request_set', sa.TEXT))


def downgrade():
op.op.drop_column('request_sets', 'id_request_sets')
op.op.drop_column('request_sets', 'uuid_request_set')
Binary file added data/Baskerville ER Diagram.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
21 changes: 21 additions & 0 deletions data/samples/retrain_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"name": "RetrainSchema",
"properties": {
"timestamp": {
"type": "string",
"format": "date",
"pattern": "(\\d\\d\\d\\d-([0-2])?\\d-([0-3])?\\dT?([0-2])?\\d:([0-5])?\\d:([0-5])?\\d\\.\\d?\\d?\\d?Z?)"
},
"uuid": {
"type": "string"
},
"uuid_organization": {
"type": "string"
},
"training_config": {
"type": "string"
}
},
"required": ["timestamp", "uuid", "uuid_organization", "training_config"],
"additionalProperties": false
}
19 changes: 19 additions & 0 deletions data/samples/sample_feedback_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"name": "FeedbackSchema",
"properties": {
"id_context": {
"type": "string"
},
"uuid_organization": {
"type": "string"
},
"feedback_context": {
"type": "object"
},
"feedback": {
"type": "object"
}
},
"required": ["id_context", "uuid_organization", "feedback_context", "feedback"],
"additionalProperties": false
}
10 changes: 4 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
jinja2==2.10
pgpubsub
jinja2>=2.10.1
numpy==1.14.3
PyYAML==3.12
cryptography==2.2.2
Expand All @@ -8,28 +7,27 @@ python-geoip==1.2
python-geoip-geolite2==2015.303
certifi==2018.4.16
ua-parser==0.8.0
bokeh==0.12.16
# bokeh==0.12.16
pandas==0.23.0
pycountry==18.2.23
scipy==1.1.0
matplotlib==2.2.2
seaborn==0.8.1
hdbscan==0.8.13
alembic==1.0.8
enum34==1.1.6
tzwhere==3.0.3
pytz==2014.10
sqlalchemy_utils==0.33.3
pyspark==2.4.4
es_retriever==1.0.0
# es_retriever==1.0.0
psutil==5.4.6
psycopg2==2.7.5
yellowbrick==0.8
dateparser==0.7.0
pymisp==2.4.93
attrs==18.1.0
warlock==1.3.0
jsonschema==2.6
jsonschema==2.6.0
stringcase==1.2.0
prometheus_client==0.5.0
grafanalib==0.5.3
Expand Down
1 change: 0 additions & 1 deletion requirements_unit_tests.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
jinja2==2.10
pgpubsub
numpy==1.14.3
PyYAML==3.12
Expand Down
217 changes: 217 additions & 0 deletions src/baskerville/db/dashboard_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
# Copyright (c) 2020, eQualit.ie inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from baskerville.db import Base
from baskerville.db.models import utcnow, SerializableMixin
from sqlalchemy import Column, Integer, ForeignKey, DateTime, Enum, String, \
Boolean, BigInteger, Float, JSON, Text, TEXT
from sqlalchemy.orm import relationship
from passlib.apps import custom_app_context as pwd_context

from baskerville.util.enums import UserCategoryEnum, FeedbackEnum, \
FeedbackContextTypeEnum


class UserCategory(Base, SerializableMixin):
__tablename__ = 'user_categories'
id = Column(Integer, primary_key=True, autoincrement=True)
category = Column(Enum(UserCategoryEnum))
# a user can belong to more than one category
users = relationship(
'User', uselist=True, back_populates='category'
)


class Organization(Base, SerializableMixin):
__tablename__ = 'organizations'
id = Column(BigInteger(), primary_key=True, autoincrement=True, unique=True)
uuid = Column(String(300), primary_key=True, unique=True)
name = Column(String(200), index=True)
details = Column(TEXT())
registered = Column(Boolean(), default=False)
created_at = Column(DateTime(timezone=True), server_default=utcnow())
updated_at = Column(
DateTime(timezone=True), nullable=True, onupdate=utcnow()
)
users = relationship(
'User', uselist=False, back_populates='organization'
)


class User(Base, SerializableMixin):
__tablename__ = 'users'
id = Column(BigInteger(), primary_key=True, autoincrement=True, unique=True)
id_organization = Column(BigInteger(), ForeignKey('organizations.id'))
id_category = Column(Integer, ForeignKey('user_categories.id'), nullable=False)
username = Column(String(200), index=True)
first_name = Column(String(200), index=True)
last_name = Column(String(200), index=True)
email = Column(String(256), unique=True, nullable=False)
password_hash = Column(String(128))
is_active = Column(Boolean())
is_gitlab_login = Column(Boolean(), default=False)
is_admin = Column(Boolean(), default=False)
created_at = Column(DateTime(timezone=True), server_default=utcnow())
updated_at = Column(
DateTime(timezone=True), nullable=True, onupdate=utcnow()
)

# users * - 1 category
category = relationship(
'UserCategory',
foreign_keys=id_category, back_populates='users'
)
organization = relationship(
'Organization',
foreign_keys=id_organization, back_populates='users'
)
runtimes = relationship(
'Runtime',
uselist=False,
# back_populates='user'
)

_remove = ['password_hash']

def hash_password(self, password):
self.password_hash = pwd_context.encrypt(password)
return self.password_hash

def verify_password(self, password):
return pwd_context.verify(password, self.password_hash)


class FeedbackContext(Base, SerializableMixin):
__tablename__ = 'feedback_contexts'
id = Column(BigInteger, primary_key=True, autoincrement=True, unique=True)
uuid_organization = Column(String(300), nullable=False)
reason = Column(Enum(FeedbackContextTypeEnum))
reason_descr = Column(TEXT())
start = Column(DateTime(timezone=True))
stop = Column(DateTime(timezone=True))
ip_count = Column(Integer)
notes = Column(TEXT)
progress_report = Column(TEXT)
pending = Column(Boolean(), default=True)


class Feedback(Base, SerializableMixin):
__tablename__ = 'feedback'

id = Column(BigInteger, primary_key=True, autoincrement=True, unique=True)
id_feedback_context = Column(BigInteger(), ForeignKey('feedback_contexts.id'), nullable=False)
id_user = Column(BigInteger(), ForeignKey('users.id'), nullable=False)
uuid_request_set = Column(TEXT(), nullable=False)
prediction = Column(Integer, nullable=False)
score = Column(Float, nullable=False)
attack_prediction = Column(Float, nullable=False)
low_rate = Column(Boolean(), nullable=True)
ip = Column(String, nullable=False)
target = Column(String, nullable=False)
features = Column(JSON, nullable=False)
feedback = Column(Enum(FeedbackEnum))
start = Column(DateTime(timezone=True), nullable=False)
stop = Column(DateTime(timezone=True), nullable=False)
submitted = Column(Boolean(), default=False)
created_at = Column(DateTime(timezone=True), server_default=utcnow())
updated_at = Column(
DateTime(timezone=True), nullable=True, onupdate=utcnow()
)

user = relationship(
'User',
foreign_keys=id_user
)
request_set = relationship(
'RequestSet',
primaryjoin='foreign(Feedback.uuid_request_set) == remote(RequestSet.uuid_request_set)'
)
feedback_context = relationship(
'FeedbackContext',
foreign_keys=id_feedback_context
)


class SubmittedFeedback(Base, SerializableMixin):
__tablename__ = 'submitted_feedback'

id = Column(BigInteger, primary_key=True, autoincrement=True, unique=True)
# not all feedback is part of an attack
id_context = Column(BigInteger(), ForeignKey('feedback_contexts.id'), nullable=False)
uuid_organization = Column(String(300), nullable=False)
uuid_request_set = Column(TEXT(), nullable=False)
prediction = Column(Integer, nullable=False)
score = Column(Float, nullable=False)
attack_prediction = Column(Float, nullable=False)
low_rate = Column(Boolean(), nullable=True)
features = Column(JSON, nullable=True)
feedback = Column(Enum(FeedbackEnum))
start = Column(DateTime(timezone=True), nullable=True)
stop = Column(DateTime(timezone=True), nullable=True)
submitted_at = Column(DateTime(timezone=True))
created_at = Column(DateTime(timezone=True), server_default=utcnow())
updated_at = Column(
DateTime(timezone=True), nullable=True, onupdate=utcnow()
)

organization = relationship(
'Organization',
primaryjoin='foreign(SubmittedFeedback.uuid_organization) == remote(Organization.uuid)'
)
request_set = relationship(
'RequestSet',
primaryjoin='foreign(SubmittedFeedback.uuid_request_set) == remote(RequestSet.uuid_request_set)'
)
columns = [
'id',
'id_context',
'uuid_organization',
'uuid_request_set',
'prediction',
'score',
'attack_prediction',
'low_rate',
'features',
'feedback',
'start',
'submitted_at',
'updated_at'
]


class Message(Base, SerializableMixin):
__tablename__ = 'messages'
id = Column(BigInteger, primary_key=True, autoincrement=True, unique=True)
id_user = Column(BigInteger(), ForeignKey('users.id'), nullable=True)
uuid_organization = Column(String(300), nullable=False)
message = Column(TEXT(), nullable=False)
severity = Column(String(), nullable=False)
created_at = Column(DateTime(timezone=True), server_default=utcnow())
user = relationship(
'User',
foreign_keys=id_user
)
organization = relationship(
'Organization',
primaryjoin='foreign(Message.uuid_organization) == remote(Organization.uuid)'
)


class PendingWork(Base, SerializableMixin):
__tablename__ = 'pending_work'
id = Column(BigInteger, primary_key=True, autoincrement=True, unique=True)
id_user = Column(BigInteger(), ForeignKey('users.id'), nullable=False)
uuid = Column(String(), nullable=False)
description = Column(TEXT(), nullable=False)
logs = Column(TEXT(), nullable=True)
success = Column(Boolean(), nullable=False, default=False)
pending = Column(Boolean(), nullable=False, default=True)
created_at = Column(DateTime(timezone=True), server_default=utcnow())
updated_at = Column(DateTime(timezone=True), server_default=utcnow())
user = relationship(
'User',
foreign_keys=id_user
)
Loading

0 comments on commit a56fcca

Please sign in to comment.