From a6db5b2b11ddf7a0c04415604ec05b1fa858dc10 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Thu, 18 Aug 2016 03:03:20 +0300 Subject: [PATCH 01/68] [WIP] implement match backend Signed-off-by: Nir Izraeli --- server/collab/models.py | 46 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/server/collab/models.py b/server/collab/models.py index 491387800..d20725fec 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -51,6 +51,8 @@ class Instance(models.Model): type = models.CharField(max_length=16, choices=TYPE_CHOICES) offset = models.BigIntegerField() + matches = models.ManyToManyField('Instance', symmetrical=True, through=Match) + def __unicode__(self): return "{} instance {} at {}".format(self.get_type_display(), self.offset, self.file.name) @@ -83,6 +85,50 @@ def __unicode__(self): __str__ = __unicode__ +class Task(models.Model): + STATUS_PENDING = 'pending' + STATUS_STARTING = 'starting' + STATUS_POPULATING = 'populating' + STATUS_MATCHING = 'matching' + STATUS_FINISHING = 'finishing' + STATUS_DONE = 'done' + STATUS_CHOICES = ((STATUS_PENDING, "Pending in Queue..."), + (STATUS_STARTING, "Started"), + ('-' + STATUS_STARTING, "Failed Starting"), + (STATUS_POPULATING, "Collecting Data..."), + ('-' + STATUS_POPULATING, "Failed Collecting Data..."), + (STATUS_MATCHING, "Comparing Elements..."), + ('-' + STATUS_MATCHING, "Failed Comparing Elements..."), + (STATUS_FINISHING, "Handling New Elements..."), + ('-' + STATUS_FINISHING, "Failed Handling New Elements..."), + (STATUS_DONE, "Done!"), + ('-' + STATUS_DONE, "General Failure")) + ACTION_COMMIT = "commit" + ACTION_MATCH = "match" + ACTION_UPDATE = "update" + ACTION_CLUSTER = "cluster" + ACTION_CHOICES = ((ACTION_COMMIT, "Commit"), + (ACTION_MATCH, "Match"), + (ACTION_UPDATE, "Update"), + (ACTION_CLUSTER, "Cluster")) + + # store matched objects + created = models.DateTimeField(auto_now_add=True) + started = models.DateTimeField() + finished = models.DateTimeField() + + owner = models.ForeignKey(User, db_index=True) + status = models.CharField(max_length=16, choices=STATUS_CHOICES) + action = models.CharField(max_length=16, choices=ACTION_CHOICES) + + +class Match(models.Model): + task = models.ForeignKey(Task, db_index=True, related_name='matches') + + instance_source = models.ForeignKey(Instance) + instance_target = models.ForeignKey(Instance) + + # # Anotations # From bd3f58ce07b9fb2670323565dd211845f381d0a0 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Thu, 25 Aug 2016 03:00:29 +0300 Subject: [PATCH 02/68] add celery and empty tasks file Signed-off-by: Nir Izraeli --- server/collab/tasks.py | 8 ++++++++ server/rematch/__init__.py | 5 +++++ server/rematch/celery.py | 17 +++++++++++++++++ server/rematch/settings.py | 9 +++++++++ server/requirements.txt | 2 ++ 5 files changed, 41 insertions(+) create mode 100644 server/collab/tasks.py create mode 100644 server/rematch/celery.py diff --git a/server/collab/tasks.py b/server/collab/tasks.py new file mode 100644 index 000000000..98f10e32a --- /dev/null +++ b/server/collab/tasks.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import + +from celery import shared_task + + +@shared_task +def match(file_id): + pass diff --git a/server/rematch/__init__.py b/server/rematch/__init__.py index e69de29bb..d069445b1 100644 --- a/server/rematch/__init__.py +++ b/server/rematch/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import + +# This will make sure the app is always imported when +# Django starts so that shared_task will use this app. +from .celery import app as celery_app # NOQA: F401 diff --git a/server/rematch/celery.py b/server/rematch/celery.py new file mode 100644 index 000000000..8d4c837b2 --- /dev/null +++ b/server/rematch/celery.py @@ -0,0 +1,17 @@ +from __future__ import absolute_import + +import os + +from celery import Celery + +# set the default Django settings module for the 'celery' program. +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rematch.settings') + +from django.conf import settings # noqa + +app = Celery('rematch') + +# Using a string here means the worker will not have to +# pickle the object when using Windows. +app.config_from_object('django.conf:settings') +app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) diff --git a/server/rematch/settings.py b/server/rematch/settings.py index e5a0d6b71..9f9c72a85 100644 --- a/server/rematch/settings.py +++ b/server/rematch/settings.py @@ -46,6 +46,7 @@ 'rest_framework', 'rest_framework.authtoken', 'rest_auth', + 'djcelery', 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', @@ -150,3 +151,11 @@ # https://docs.djangoproject.com/en/1.9/howto/static-files/ STATIC_URL = '/static/' + + +# Celery configuration + +CELERY_RESULT_BACKEND = 'djcelery.backends.database:DatabaseBackend' +CELERY_ACCEPT_CONTENT = ['json'] +CELERY_TASK_SERIALIZER = 'json' +CELERY_RESULT_SERIALIZER = 'json' diff --git a/server/requirements.txt b/server/requirements.txt index 01b0eeaaf..a1b083ef3 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -4,3 +4,5 @@ django-registration-redux djangorestframework django-rest-auth django-allauth +celery +django-celery From a878d5d79723b318bd9c47fcd74738e5b4ba3feb Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Thu, 25 Aug 2016 16:37:46 +0300 Subject: [PATCH 03/68] Try my multi-table inheritence idea See http://stackoverflow.com/questions/39146334 for implementation details and community scrutiny Signed-off-by: Nir Izraeli --- server/collab/models.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/server/collab/models.py b/server/collab/models.py index d20725fec..7e1213ef4 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -51,7 +51,7 @@ class Instance(models.Model): type = models.CharField(max_length=16, choices=TYPE_CHOICES) offset = models.BigIntegerField() - matches = models.ManyToManyField('Instance', symmetrical=True, through=Match) + matches = models.ManyToManyField('self', symmetrical=True) def __unicode__(self): return "{} instance {} at {}".format(self.get_type_display(), self.offset, @@ -96,11 +96,11 @@ class Task(models.Model): (STATUS_STARTING, "Started"), ('-' + STATUS_STARTING, "Failed Starting"), (STATUS_POPULATING, "Collecting Data..."), - ('-' + STATUS_POPULATING, "Failed Collecting Data..."), + ('-' + STATUS_POPULATING, "Failed Collecting Data"), (STATUS_MATCHING, "Comparing Elements..."), - ('-' + STATUS_MATCHING, "Failed Comparing Elements..."), + ('-' + STATUS_MATCHING, "Failed Comparing Elements"), (STATUS_FINISHING, "Handling New Elements..."), - ('-' + STATUS_FINISHING, "Failed Handling New Elements..."), + ('-' + STATUS_FINISHING, "Failed Handling New Elements"), (STATUS_DONE, "Done!"), ('-' + STATUS_DONE, "General Failure")) ACTION_COMMIT = "commit" @@ -122,12 +122,9 @@ class Task(models.Model): action = models.CharField(max_length=16, choices=ACTION_CHOICES) -class Match(models.Model): +class Match(Instance.matches.through()): task = models.ForeignKey(Task, db_index=True, related_name='matches') - instance_source = models.ForeignKey(Instance) - instance_target = models.ForeignKey(Instance) - # # Anotations From 09db5cd3dbd2581f9f20c9fa79c89424ec3509b8 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Thu, 25 Aug 2016 18:27:32 +0300 Subject: [PATCH 04/68] Merge Task model with async tasks and create proper views Add basic statuses for Tasks, simple update in match task and serializer exposure Make Task.project optional, defaulting to file.project when available make task id a uuid field and make it an additional field instead of the primary key Signed-off-by: Nir Izraeli --- server/collab/models.py | 29 +++++++++++++---------------- server/collab/serializers.py | 18 +++++++++++++++++- server/collab/tasks.py | 15 ++++++++++++--- server/collab/urls.py | 1 + server/collab/views.py | 30 +++++++++++++++++++++++++++--- 5 files changed, 70 insertions(+), 23 deletions(-) diff --git a/server/collab/models.py b/server/collab/models.py index 7e1213ef4..a65cd8d51 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -87,22 +87,13 @@ def __unicode__(self): class Task(models.Model): STATUS_PENDING = 'pending' - STATUS_STARTING = 'starting' - STATUS_POPULATING = 'populating' - STATUS_MATCHING = 'matching' - STATUS_FINISHING = 'finishing' + STATUS_STARTED = 'started' STATUS_DONE = 'done' + STATUS_FAILED = 'failed' STATUS_CHOICES = ((STATUS_PENDING, "Pending in Queue..."), - (STATUS_STARTING, "Started"), - ('-' + STATUS_STARTING, "Failed Starting"), - (STATUS_POPULATING, "Collecting Data..."), - ('-' + STATUS_POPULATING, "Failed Collecting Data"), - (STATUS_MATCHING, "Comparing Elements..."), - ('-' + STATUS_MATCHING, "Failed Comparing Elements"), - (STATUS_FINISHING, "Handling New Elements..."), - ('-' + STATUS_FINISHING, "Failed Handling New Elements"), + (STATUS_STARTED, "Started"), (STATUS_DONE, "Done!"), - ('-' + STATUS_DONE, "General Failure")) + (STATUS_FAILED, "Failure")) ACTION_COMMIT = "commit" ACTION_MATCH = "match" ACTION_UPDATE = "update" @@ -112,15 +103,21 @@ class Task(models.Model): (ACTION_UPDATE, "Update"), (ACTION_CLUSTER, "Cluster")) + # TODO: to uuid field + task_id = models.UUIDField(db_index=True, unique=True, editable=False) + # store matched objects created = models.DateTimeField(auto_now_add=True) - started = models.DateTimeField() - finished = models.DateTimeField() + finished = models.DateTimeField(null=True) owner = models.ForeignKey(User, db_index=True) - status = models.CharField(max_length=16, choices=STATUS_CHOICES) + status = models.CharField(default=STATUS_PENDING, max_length=16, + choices=STATUS_CHOICES) action = models.CharField(max_length=16, choices=ACTION_CHOICES) + project = models.ForeignKey(Project, related_name='tasks') + file = models.ForeignKey(File, related_name='tasks') + class Match(Instance.matches.through()): task = models.ForeignKey(Task, db_index=True, related_name='matches') diff --git a/server/collab/serializers.py b/server/collab/serializers.py index d59fa2be9..2ef450477 100644 --- a/server/collab/serializers.py +++ b/server/collab/serializers.py @@ -1,5 +1,5 @@ from rest_framework import serializers -from collab.models import Project, File, Instance, Vector +from collab.models import Project, File, Task, Instance, Vector class ProjectSerializer(serializers.ModelSerializer): @@ -22,6 +22,22 @@ class Meta: 'md5hash', 'file', 'instances') +class TaskSerializer(serializers.ModelSerializer): + owner = serializers.ReadOnlyField(source='owner.username') + task_id = serializers.ReadOnlyField() + created = serializers.ReadOnlyField() + finished = serializers.ReadOnlyField() + status = serializers.ReadOnlyField() + + project = serializers.PrimaryKeyRelatedField(queryset=Project.objects.all(), + allow_null=True) + + class Meta: + model = Task + fields = ('id', 'task_id', 'created', 'finished', 'owner', 'status', + 'action', 'project', 'file') + + class InstanceSerializer(serializers.ModelSerializer): class NestedVectorSerializer(serializers.ModelSerializer): class Meta: diff --git a/server/collab/tasks.py b/server/collab/tasks.py index 98f10e32a..67daa6716 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -1,8 +1,17 @@ -from __future__ import absolute_import +from django.utils.timezone import now +from models import Task from celery import shared_task @shared_task -def match(file_id): - pass +def match(file_id, project_id): + task = Task.objects.filter(task_id=match.request.id) + + # recording the task has started + task.update(status=Task.STATUS_STARTED) + + print("Running task {}".format(match.request.id)) + + # TODO: finished=now + task.update(status=Task.STATUS_DONE, finished=now()) diff --git a/server/collab/urls.py b/server/collab/urls.py index d7ba022d7..b55c86e63 100644 --- a/server/collab/urls.py +++ b/server/collab/urls.py @@ -6,6 +6,7 @@ router = DefaultRouter() router.register(r'projects', views.ProjectViewSet) router.register(r'files', views.FileViewSet) +router.register(r'tasks', views.TaskViewSet) router.register(r'instances', views.InstanceViewSet) router.register(r'vectors', views.VectorViewSet) diff --git a/server/collab/views.py b/server/collab/views.py index 2ddda9b4e..56ed20cb4 100644 --- a/server/collab/views.py +++ b/server/collab/views.py @@ -1,8 +1,10 @@ -from rest_framework import viewsets, permissions -from collab.models import Project, File, Instance, Vector +from rest_framework import viewsets, permissions, mixins +from collab.models import Project, File, Task, Instance, Vector from collab.serializers import (ProjectSerializer, FileSerializer, - InstanceSerializer, VectorSerializer) + TaskSerializer, InstanceSerializer, + VectorSerializer) from collab.permissions import IsOwnerOrReadOnly +from collab import tasks class ViewSetOwnerMixin(object): @@ -34,6 +36,28 @@ class FileViewSet(ViewSetOwnerMixin, viewsets.ModelViewSet): serializer_class = FileSerializer +class TaskViewSet(mixins.CreateModelMixin, mixins.RetrieveModelMixin, + mixins.DestroyModelMixin, mixins.ListModelMixin, + viewsets.GenericViewSet): + queryset = Task.objects.all() + serializer_class = TaskSerializer + permission_classes = (permissions.IsAuthenticatedOrReadOnly, + IsOwnerOrReadOnly) + + def perform_create(self, serializer): + if not serializer.validated_data['project']: + project = serializer.validated_data['file'].project + serializer.validated_data['project'] = project + + # if no project, let serializer.save fail on none project + if serializer.validated_data['project']: + result = tasks.match.delay(serializer.validated_data['file'].id, + serializer.validated_data['project'].id) + serializer.save(owner=self.request.user, task_id=result.id) + else: + serializer.save(owner=self.request.user, task_id='') + + class InstanceViewSet(ViewSetManyAllowedMixin, ViewSetOwnerMixin, viewsets.ModelViewSet): queryset = Instance.objects.all() From ad24f3ae703dfd81a76420ee4b5e71b31ccea757 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 26 Aug 2016 10:28:58 +0300 Subject: [PATCH 05/68] Create task when function data upload is done Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 45d4bf695..3ecdef1e6 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -14,29 +14,32 @@ class MatchAllAction(base.BoundFileAction): group = "Match" def activate(self, ctx): - self.file_id = netnode.bound_file_id - self.function_gen = enumerate(Functions()) - pd = QtWidgets.QProgressDialog(labelText="Processing functions...", + pd = QtWidgets.QProgressDialog(labelText="Processing...\nYou may continue " + "working but avoid ground-" + "breaking changes.", minimum=0, maximum=len(list(Functions()))) self.progress = pd self.progress.canceled.connect(self.cancel) + self.timer = QtCore.QTimer() - self.timer.timeout.connect(self.perform) + self.timer.timeout.connect(self.perform_upload) self.timer.start() - def perform(self): + self.progress.accepted.connect(self.accepted_upload) + + def perform_upload(self): try: i, offset = self.function_gen.next() - func = instances.FunctionInstance(self.file_id, offset) + func = instances.FunctionInstance(netnode.bound_file_id, offset) network.query("POST", "collab/instances/", params=func.serialize(), json=True) i = i + 1 self.progress.setValue(i) if (i >= self.progress.maximum()): - self.timer.stop() + self.progress.accept() except: self.timer.stop() raise @@ -44,7 +47,18 @@ def perform(self): def cancel(self): self.timer.stop() + def accepted_upload(self): + self.timer.stop() + self.timer.disconnect() + + # TODO: ask for project to compare against + task_params = {'action': 'commit', 'file': netnode.bound_file_id, + 'project': None} + r = network.query("POST", "collab/tasks/", params=task_params, json=True) + print(r) + +# TODO: inherit logic in MatchAllAction class MatchFunctionAction(base.BoundFileAction): name = "Match &Function" group = "Match" From 7eb3470362868bfea012007bed86e47b740db773 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 26 Aug 2016 22:57:06 +0300 Subject: [PATCH 06/68] Create base MatchAction class both MatchAll and MatchFunction inherit Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 45 +++++++++++++++++------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 3ecdef1e6..cca48352b 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -1,6 +1,6 @@ import idaapi import idc -from idautils import Functions +import idautils from ..idasix import QtCore, QtWidgets @@ -9,16 +9,17 @@ from . import base -class MatchAllAction(base.BoundFileAction): - name = "&Match all" - group = "Match" - +class MatchAction(base.BoundFileAction): def activate(self, ctx): - self.function_gen = enumerate(Functions()) + function_gen = self.get_functions() + if not function_gen: + return + + self.function_gen = enumerate(function_gen) pd = QtWidgets.QProgressDialog(labelText="Processing...\nYou may continue " "working but avoid ground-" "breaking changes.", - minimum=0, maximum=len(list(Functions()))) + maximum=self.get_functions_count()) self.progress = pd self.progress.canceled.connect(self.cancel) @@ -58,20 +59,24 @@ def accepted_upload(self): print(r) -# TODO: inherit logic in MatchAllAction -class MatchFunctionAction(base.BoundFileAction): - name = "Match &Function" +class MatchAllAction(MatchAction): + name = "&Match all" group = "Match" - @staticmethod - def activate(ctx): - file_id = netnode.bound_file_id + def get_functions(self): + return idautils.Functions() - function = idaapi.choose_func("Choose function to match with database", - idc.ScreenEA()) - if function is None: - return + def get_functions_count(self): + return len(set(self.get_functions)) + + +class MatchFunctionAction(MatchAction): + name = "Match &Function" + group = "Match" + + def get_functions(self): + return idaapi.choose_func("Choose function to match with database", + idc.ScreenEA()) - data = instances.FunctionInstance(file_id, function.startEA) - network.query("POST", "collab/instances/", params=data.serialize(), - json=True) + def get_functions_count(self): + return 1 From ccf5a83a8e16e91509de9ebfaebfe79b3cd9d7a3 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sat, 27 Aug 2016 03:11:47 +0300 Subject: [PATCH 07/68] call get_functions() in MatchAllAction.get_functions_count plus turn methods into staticmethods and classmethods Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index cca48352b..af821fdff 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -63,20 +63,24 @@ class MatchAllAction(MatchAction): name = "&Match all" group = "Match" - def get_functions(self): + @staticmethod + def get_functions(): return idautils.Functions() - def get_functions_count(self): - return len(set(self.get_functions)) + @classmethod + def get_functions_count(cls): + return len(set(cls.get_functions())) class MatchFunctionAction(MatchAction): name = "Match &Function" group = "Match" - def get_functions(self): + @staticmethod + def get_functions(): return idaapi.choose_func("Choose function to match with database", idc.ScreenEA()) - def get_functions_count(self): + @staticmethod + def get_functions_count(): return 1 From 34a42804ab9b7c576f32a0ac06a8a5665b5aeaa7 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sat, 27 Aug 2016 13:57:00 +0300 Subject: [PATCH 08/68] initialize MatchAction params in __init__ and remove unnecessary parenthesis Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index af821fdff..e8120ec53 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -10,6 +10,12 @@ class MatchAction(base.BoundFileAction): + def __init__(self, *args, **kwargs): + super(MatchAction, self).__init__(*args, **kwargs) + self.function_gen = None + self.pbar = None + self.timer = QtCore.QTimer() + def activate(self, ctx): function_gen = self.get_functions() if not function_gen: @@ -20,14 +26,13 @@ def activate(self, ctx): "working but avoid ground-" "breaking changes.", maximum=self.get_functions_count()) - self.progress = pd - self.progress.canceled.connect(self.cancel) + self.pbar = pd + self.pbar.canceled.connect(self.cancel) - self.timer = QtCore.QTimer() self.timer.timeout.connect(self.perform_upload) self.timer.start() - self.progress.accepted.connect(self.accepted_upload) + self.pbar.accepted.connect(self.accepted_upload) def perform_upload(self): try: @@ -38,9 +43,9 @@ def perform_upload(self): json=True) i = i + 1 - self.progress.setValue(i) - if (i >= self.progress.maximum()): - self.progress.accept() + self.pbar.setValue(i) + if i >= self.pbar.maximum(): + self.pbar.accept() except: self.timer.stop() raise From 36126ae3e81bddd9a7979642694a3467a56296e0 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sun, 28 Aug 2016 20:19:16 +0300 Subject: [PATCH 09/68] add match dialog * match dialog fixes and connections * Add BaseDialog radio group utility and use that in MatchDialog * Also make MatchDialog functional * Merge match all and match function actions to match Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 40 +++------- idaplugin/rematch/dialogs/__init__.py | 3 +- idaplugin/rematch/dialogs/base.py | 29 +++++++ idaplugin/rematch/dialogs/match.py | 74 ++++++++++++++++++ .../images/{match_all.png => match.png} | Bin idaplugin/rematch/images/match_function.png | Bin 1224 -> 0 bytes idaplugin/rematch/plugin.py | 3 +- 7 files changed, 118 insertions(+), 31 deletions(-) create mode 100755 idaplugin/rematch/dialogs/match.py rename idaplugin/rematch/images/{match_all.png => match.png} (100%) delete mode 100755 idaplugin/rematch/images/match_function.png diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index e8120ec53..413092a32 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -4,19 +4,30 @@ from ..idasix import QtCore, QtWidgets +from ..dialogs.match import MatchDialog + from .. import instances from .. import network, netnode from . import base class MatchAction(base.BoundFileAction): + name = "&Match" + def __init__(self, *args, **kwargs): super(MatchAction, self).__init__(*args, **kwargs) self.function_gen = None self.pbar = None self.timer = QtCore.QTimer() + self.source = None + self.target = None + self.methods = [] def activate(self, ctx): + dialog = MatchDialog() + self.source, self.target, self.methods = dialog.get() + print(self.source, self.target, self.methods) + function_gen = self.get_functions() if not function_gen: return @@ -61,31 +72,4 @@ def accepted_upload(self): task_params = {'action': 'commit', 'file': netnode.bound_file_id, 'project': None} r = network.query("POST", "collab/tasks/", params=task_params, json=True) - print(r) - - -class MatchAllAction(MatchAction): - name = "&Match all" - group = "Match" - - @staticmethod - def get_functions(): - return idautils.Functions() - - @classmethod - def get_functions_count(cls): - return len(set(cls.get_functions())) - - -class MatchFunctionAction(MatchAction): - name = "Match &Function" - group = "Match" - - @staticmethod - def get_functions(): - return idaapi.choose_func("Choose function to match with database", - idc.ScreenEA()) - - @staticmethod - def get_functions_count(): - return 1 + print(r) \ No newline at end of file diff --git a/idaplugin/rematch/dialogs/__init__.py b/idaplugin/rematch/dialogs/__init__.py index 47eac7e4b..5d9fba8de 100755 --- a/idaplugin/rematch/dialogs/__init__.py +++ b/idaplugin/rematch/dialogs/__init__.py @@ -1,6 +1,7 @@ import base import login +import match import project import settings -__all__ = [base, login, project, settings] +__all__ = [base, login, match, project, settings] diff --git a/idaplugin/rematch/dialogs/base.py b/idaplugin/rematch/dialogs/base.py index 9e62793db..57d3f2306 100755 --- a/idaplugin/rematch/dialogs/base.py +++ b/idaplugin/rematch/dialogs/base.py @@ -15,10 +15,39 @@ def __init__(self, title="", reject_handler=None, submit_handler=None, self.exception_handler = exception_handler self.response = None self.statusLbl = None + self.radio_groups = {} self.base_layout = QtWidgets.QVBoxLayout() self.setLayout(self.base_layout) + def add_radio_group(self, title, *radios, **kwargs): + radiogroup = QtWidgets.QButtonGroup() + groupbox = QtWidgets.QGroupBox(title) + layout = QtWidgets.QVBoxLayout() + checked = kwargs.pop('checked', None) + + self.radio_groups[radiogroup] = [] + for i, radio in enumerate(radios): + radio_name, id = radio + radio_widget = QtWidgets.QRadioButton(radio_name) + + # if checked is supplied, set correct radio as checked + # else set first radio as checked` + if (checked is None and i == 0) or checked == id: + radio_widget.setChecked(True) + + radiogroup.addButton(radio_widget, i) + layout.addWidget(radio_widget) + self.radio_groups[radiogroup].append(id) + groupbox.setLayout(layout) + self.layout.addWidget(groupbox) + + return radiogroup + + def get_radio_result(self, group): + group_ids = self.radio_groups[group] + return group_ids[group.checkedId()] + def bottom_layout(self, ok_text="&Ok", cencel_text="&Cancel"): self.statusLbl = QtWidgets.QLabel() self.base_layout.addWidget(self.statusLbl) diff --git a/idaplugin/rematch/dialogs/match.py b/idaplugin/rematch/dialogs/match.py new file mode 100755 index 000000000..144fb6307 --- /dev/null +++ b/idaplugin/rematch/dialogs/match.py @@ -0,0 +1,74 @@ +try: + from PyQt5 import QtWidgets +except: + from PySide import QtGui + QtWidgets = QtGui + +from . import base +from .. import network, netnode + + +class MatchDialog(base.BaseDialog): + def __init__(self, **kwargs): + super(MatchDialog, self).__init__(title="Match", **kwargs) + + self.layout = QtWidgets.QVBoxLayout() + + self.sourceGrp = self.add_radio_group("Match source", + ("Entire IDB", 'idb'), + ("User functions", 'user'), + ("Single function", 'single'), + ("Range", 'range')) + + self.targetGrp = self.add_radio_group("Match target", + ("Entire DB", 'db'), + ("Project", 'project'), + ("Another file", 'file')) + + self.identity = QtWidgets.QCheckBox("Identify matches") + self.fuzzy = QtWidgets.QCheckBox("Fuzzy matches") + self.graph = QtWidgets.QCheckBox("Graph matches") + self.identity.setChecked(True) + self.fuzzy.setChecked(True) + self.graph.setChecked(True) + methodLyt = QtWidgets.QVBoxLayout() + methodLyt.addWidget(self.identity) + methodLyt.addWidget(self.fuzzy) + methodLyt.addWidget(self.graph) + + methodGbx = QtWidgets.QGroupBox("Match methods") + methodGbx.setLayout(methodLyt) + self.layout.addWidget(methodGbx) + + self.statusLbl = QtWidgets.QLabel() + self.layout.addWidget(self.statusLbl) + + startBtn = QtWidgets.QPushButton("&Start matching") + startBtn.setDefault(True) + cancelBtn = QtWidgets.QPushButton("&Cancel") + SizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, + QtWidgets.QSizePolicy.Fixed) + startBtn.setSizePolicy(SizePolicy) + cancelBtn.setSizePolicy(SizePolicy) + buttonLyt = QtWidgets.QHBoxLayout() + buttonLyt.addWidget(startBtn) + buttonLyt.addWidget(cancelBtn) + self.layout.addLayout(buttonLyt) + + self.setLayout(self.layout) + + startBtn.clicked.connect(self.accept) + cancelBtn.clicked.connect(self.reject) + + def data(self): + source = self.get_radio_result(self.sourceGrp) + target = self.get_radio_result(self.targetGrp) + methods = [] + if self.identity.isChecked(): + methods.append('identity') + if self.fuzzy.isChecked(): + methods.append('fuzzy') + if self.graph.isChecked(): + methods.append('graph') + + return source, target, methods diff --git a/idaplugin/rematch/images/match_all.png b/idaplugin/rematch/images/match.png similarity index 100% rename from idaplugin/rematch/images/match_all.png rename to idaplugin/rematch/images/match.png diff --git a/idaplugin/rematch/images/match_function.png b/idaplugin/rematch/images/match_function.png deleted file mode 100755 index f369cc21d984b79eb44224b9dfe1508f0bd07d2c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1224 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE3?yBabR7dyEa{HEjtmSN`?>!lvVtU&J%W50 z7^>757#dm_7=8hT8eT9klo~KFyh>nTu$sZZAYL$MSD+081LKkapAgso|Nk@OePqb} zz>xc%A@2hSWqo8|_|K377J-X{7!Z;73~B$t><J%uQ(<&Ea(_|h z&R5TZSZ9CFm+^kIO4K;sgM0Z4OWqSKb`LjRkUidF^giaZwZdETvndZI_KAH`_Bt1R zMNdTSr$BM8@1k@V5>u0ptDZf0 z^XSpDhfg2ZZwQSF@Q82`IU^)IeS*v6$&;o{oH{wY;iKrzsZ%CC4c!rP^-Akb69J`D zv!Xmc-Ll%HQ2h1F#3MXBy#g}H!UjvTyK+{|w49Tv$g|P^!X+-x#;v?_GPBuJKioKc z^oFd=+&K$##MQ%n^mmt*mhP;sXqpk{x8{cP+1tn6&dzQ;c>TP5`VMv$HbVmz?T4L` zM+HRNE*RKwNgPs@Pa$oWG=Ma@uU`I1M06Q-S8 zFlSAlw57;|Ia%JTeha8R$SG-6-~=&*BRNNs%y3~$vE*NBpo#FA92mmtT}V`<;yxP!WTttDnm{r-UW|mX7N$ diff --git a/idaplugin/rematch/plugin.py b/idaplugin/rematch/plugin.py index 44501a145..efec0c2ab 100755 --- a/idaplugin/rematch/plugin.py +++ b/idaplugin/rematch/plugin.py @@ -52,8 +52,7 @@ def setup(self): actions.project.AddProjectAction.register() actions.project.AddFileAction.register() - actions.match.MatchAllAction.register() - actions.match.MatchFunctionAction.register() + actions.match.MatchAction.register() actions.settings.SettingsAction.register() From 25243e0630a31235870932163e6dd305ed2276e3 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Mon, 29 Aug 2016 17:13:02 +0300 Subject: [PATCH 10/68] fix minor codacy issues Signed-off-by: Nir Izraeli --- idaplugin/rematch/dialogs/match.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/idaplugin/rematch/dialogs/match.py b/idaplugin/rematch/dialogs/match.py index 144fb6307..781cededf 100755 --- a/idaplugin/rematch/dialogs/match.py +++ b/idaplugin/rematch/dialogs/match.py @@ -1,11 +1,10 @@ try: from PyQt5 import QtWidgets -except: +except ImportError: from PySide import QtGui QtWidgets = QtGui from . import base -from .. import network, netnode class MatchDialog(base.BaseDialog): From 31227216734eacdca5ffdc26bb65b614f00f8ef6 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Mon, 29 Aug 2016 17:16:11 +0300 Subject: [PATCH 11/68] fix flake8 issues Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 6 +----- idaplugin/rematch/dialogs/base.py | 6 +++--- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 413092a32..1d1268b58 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -1,7 +1,3 @@ -import idaapi -import idc -import idautils - from ..idasix import QtCore, QtWidgets from ..dialogs.match import MatchDialog @@ -72,4 +68,4 @@ def accepted_upload(self): task_params = {'action': 'commit', 'file': netnode.bound_file_id, 'project': None} r = network.query("POST", "collab/tasks/", params=task_params, json=True) - print(r) \ No newline at end of file + print(r) diff --git a/idaplugin/rematch/dialogs/base.py b/idaplugin/rematch/dialogs/base.py index 57d3f2306..3c55c9af6 100755 --- a/idaplugin/rematch/dialogs/base.py +++ b/idaplugin/rematch/dialogs/base.py @@ -28,17 +28,17 @@ def add_radio_group(self, title, *radios, **kwargs): self.radio_groups[radiogroup] = [] for i, radio in enumerate(radios): - radio_name, id = radio + radio_name, radio_id = radio radio_widget = QtWidgets.QRadioButton(radio_name) # if checked is supplied, set correct radio as checked # else set first radio as checked` - if (checked is None and i == 0) or checked == id: + if (checked is None and i == 0) or checked == radio_id: radio_widget.setChecked(True) radiogroup.addButton(radio_widget, i) layout.addWidget(radio_widget) - self.radio_groups[radiogroup].append(id) + self.radio_groups[radiogroup].append(radio_id) groupbox.setLayout(layout) self.layout.addWidget(groupbox) From 8d863f8c40053583b4a610f2602ed67d0153a7ea Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Tue, 30 Aug 2016 00:20:50 +0300 Subject: [PATCH 12/68] Implement simple match sources Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 41 +++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 1d1268b58..a1f342ad6 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -1,4 +1,7 @@ from ..idasix import QtCore, QtWidgets +import idautils +import idaapi +import idc from ..dialogs.match import MatchDialog @@ -19,9 +22,45 @@ def __init__(self, *args, **kwargs): self.target = None self.methods = [] + def get_functions(self): + if self.source == 'idb': + return idautils.Functions() + elif self.source == 'user': + raise NotImplementedError("All user functions are not currently " + "supported as source value.") + elif self.source == 'single': + return idaapi.choose_func("Choose function to match with database", + idc.ScreenEA()) + elif self.source == 'range': + raise NotImplementedError("Range of addresses is not currently " + "supported as source value.") + + raise ValueError("Invalid source value received from MatchDialog: {}" + "".format(self.source)) + + def get_functions_count(self): + if self.source == 'idb': + return len(set(idautils.Functions())) + elif self.source == 'user': + raise NotImplementedError("All user functions are not currently " + "supported as source value.") + elif self.soruce == 'single': + return 1 + elif self.source == 'range': + raise NotImplementedError("Range of addresses is not currently " + "supported as source value.") + + raise ValueError("Invalid source value received from MatchDialog: {}" + "".format(self.source)) + def activate(self, ctx): dialog = MatchDialog() - self.source, self.target, self.methods = dialog.get() + data, _, result = dialog.get() + + if result is None: + return + + self.source, self.target, self.methods = data print(self.source, self.target, self.methods) function_gen = self.get_functions() From a290903c51dbcaa7baac69851266586a701c544e Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Tue, 30 Aug 2016 01:49:44 +0300 Subject: [PATCH 13/68] Fix single source function match Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index a1f342ad6..dffa6c455 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -29,8 +29,11 @@ def get_functions(self): raise NotImplementedError("All user functions are not currently " "supported as source value.") elif self.source == 'single': - return idaapi.choose_func("Choose function to match with database", + func = idaapi.choose_func("Choose function to match with database", idc.ScreenEA()) + if not func: + return None + return [func.startEA] elif self.source == 'range': raise NotImplementedError("Range of addresses is not currently " "supported as source value.") @@ -44,7 +47,7 @@ def get_functions_count(self): elif self.source == 'user': raise NotImplementedError("All user functions are not currently " "supported as source value.") - elif self.soruce == 'single': + elif self.source == 'single': return 1 elif self.source == 'range': raise NotImplementedError("Range of addresses is not currently " From f2ec53ebf9a5f6453eac32c5cd324d3c28b3da6e Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 31 Aug 2016 20:15:17 +0300 Subject: [PATCH 14/68] Add file_id to vectors and an empty loop that pulls the vectors Signed-off-by: Nir Izraeli --- server/collab/models.py | 1 + server/collab/serializers.py | 4 +++- server/collab/tasks.py | 18 ++++++++++++++---- server/collab/views.py | 6 +++++- 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/server/collab/models.py b/server/collab/models.py index a65cd8d51..3206ace31 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -74,6 +74,7 @@ class Vector(models.Model): (TYPE_OPCODE_HIST, "Opcode Histogram")) instance = models.ForeignKey(Instance, related_name='vectors') + file = models.ForeignKey(File, related_name='vectors') type = models.CharField(max_length=16, choices=TYPE_CHOICES) type_version = models.IntegerField() data = models.TextField() diff --git a/server/collab/serializers.py b/server/collab/serializers.py index 2ef450477..0560ea92d 100644 --- a/server/collab/serializers.py +++ b/server/collab/serializers.py @@ -61,6 +61,8 @@ def create(self, validated_data): class VectorSerializer(serializers.ModelSerializer): + file = serializers.ReadOnlyField() + class Meta: model = Vector - fields = ('id', 'instance', 'type', 'type_version', 'data') + fields = ('id', 'file', 'instance', 'type', 'type_version', 'data') diff --git a/server/collab/tasks.py b/server/collab/tasks.py index 67daa6716..b082178dc 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -1,17 +1,27 @@ from django.utils.timezone import now -from models import Task +from models import Task, Vector from celery import shared_task @shared_task def match(file_id, project_id): - task = Task.objects.filter(task_id=match.request.id) - # recording the task has started + task = Task.objects.filter(task_id=match.request.id) task.update(status=Task.STATUS_STARTED) print("Running task {}".format(match.request.id)) + # TODO: order should be important here + vector_types = [t[0] for t in Vector.TYPE_CHOICES] + for vector_type in vector_types: + print(vector_type) + vectors = Vector.objects.filter(type=vector_type) + source_vectors = vectors.filter(file_id=file_id) + target_vectors = vectors.filter(file_id__project_id=project_id, + file_id__not=file_id) + print(source_vectors) + print(target_vectors) + print(source_vectors.all()) + print(target_vectors.all()) - # TODO: finished=now task.update(status=Task.STATUS_DONE, finished=now()) diff --git a/server/collab/views.py b/server/collab/views.py index 56ed20cb4..6e06958d6 100644 --- a/server/collab/views.py +++ b/server/collab/views.py @@ -31,10 +31,14 @@ class ProjectViewSet(ViewSetOwnerMixin, viewsets.ModelViewSet): serializer_class = ProjectSerializer -class FileViewSet(ViewSetOwnerMixin, viewsets.ModelViewSet): +class FileViewSet(viewsets.ModelViewSet): queryset = File.objects.all() serializer_class = FileSerializer + def perform_create(self, serializer): + serializer.save(owner=self.request.user, + file=serializer.validated_data['instance'].file) + class TaskViewSet(mixins.CreateModelMixin, mixins.RetrieveModelMixin, mixins.DestroyModelMixin, mixins.ListModelMixin, From 25ddae66f224ebcf0e6c96757fa8e642a672d7ca Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 31 Aug 2016 20:51:31 +0300 Subject: [PATCH 15/68] Add progress fields to task Signed-off-by: Nir Izraeli --- server/collab/models.py | 3 +++ server/collab/tasks.py | 11 ++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/server/collab/models.py b/server/collab/models.py index 3206ace31..c2d24498a 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -119,6 +119,9 @@ class Task(models.Model): project = models.ForeignKey(Project, related_name='tasks') file = models.ForeignKey(File, related_name='tasks') + progress = models.PositiveSmallIntegerField(default=0) + progress_max = models.PositiveSmallIntegerField(default=0) + class Match(Instance.matches.through()): task = models.ForeignKey(Task, db_index=True, related_name='matches') diff --git a/server/collab/tasks.py b/server/collab/tasks.py index b082178dc..8aa6d9db8 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -1,4 +1,5 @@ from django.utils.timezone import now +from djangp.db.models import F from models import Task, Vector from celery import shared_task @@ -6,13 +7,15 @@ @shared_task def match(file_id, project_id): + # doing some preperations + vector_types = [t[0] for t in Vector.TYPE_CHOICES] + # recording the task has started task = Task.objects.filter(task_id=match.request.id) - task.update(status=Task.STATUS_STARTED) + task.update(status=Task.STATUS_STARTED, progress_max=len(vector_types)) print("Running task {}".format(match.request.id)) - # TODO: order should be important here - vector_types = [t[0] for t in Vector.TYPE_CHOICES] + # TODO: order might be important here for vector_type in vector_types: print(vector_type) vectors = Vector.objects.filter(type=vector_type) @@ -24,4 +27,6 @@ def match(file_id, project_id): print(source_vectors.all()) print(target_vectors.all()) + task.update(progress=F('progress')+1) + task.update(status=Task.STATUS_DONE, finished=now()) From 1401ac12060f2904fd1e95a6b6477ad65e372d1d Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 31 Aug 2016 21:56:40 +0300 Subject: [PATCH 16/68] fix spaces around '+' in tasks.py Signed-off-by: Nir Izraeli --- server/collab/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/collab/tasks.py b/server/collab/tasks.py index 8aa6d9db8..cf8be233f 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -27,6 +27,6 @@ def match(file_id, project_id): print(source_vectors.all()) print(target_vectors.all()) - task.update(progress=F('progress')+1) + task.update(progress=F('progress') + 1) task.update(status=Task.STATUS_DONE, finished=now()) From e202739cceb308ac5d9c2c77dbf989596c6d6513 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 31 Aug 2016 23:40:02 +0300 Subject: [PATCH 17/68] Fix vector file field population Signed-off-by: Nir Izraeli --- server/collab/serializers.py | 2 +- server/collab/tasks.py | 2 +- server/collab/views.py | 9 ++++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/server/collab/serializers.py b/server/collab/serializers.py index 0560ea92d..5380b7ffa 100644 --- a/server/collab/serializers.py +++ b/server/collab/serializers.py @@ -54,7 +54,7 @@ class Meta: def create(self, validated_data): vectors_data = validated_data.pop('vectors') obj = self.Meta.model.objects.create(**validated_data) - vectors = (Vector(instance=obj, **vector_data) + vectors = (Vector(instance=obj, file=validated_data['file'], **vector_data) for vector_data in vectors_data) Vector.objects.bulk_create(vectors) return obj diff --git a/server/collab/tasks.py b/server/collab/tasks.py index cf8be233f..736b380b9 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -1,5 +1,5 @@ from django.utils.timezone import now -from djangp.db.models import F +from django.db.models import F from models import Task, Vector from celery import shared_task diff --git a/server/collab/views.py b/server/collab/views.py index 6e06958d6..5777a29f0 100644 --- a/server/collab/views.py +++ b/server/collab/views.py @@ -31,14 +31,10 @@ class ProjectViewSet(ViewSetOwnerMixin, viewsets.ModelViewSet): serializer_class = ProjectSerializer -class FileViewSet(viewsets.ModelViewSet): +class FileViewSet(ViewSetOwnerMixin, viewsets.ModelViewSet): queryset = File.objects.all() serializer_class = FileSerializer - def perform_create(self, serializer): - serializer.save(owner=self.request.user, - file=serializer.validated_data['instance'].file) - class TaskViewSet(mixins.CreateModelMixin, mixins.RetrieveModelMixin, mixins.DestroyModelMixin, mixins.ListModelMixin, @@ -72,3 +68,6 @@ class VectorViewSet(ViewSetManyAllowedMixin, viewsets.ModelViewSet): queryset = Vector.objects.all() serializer_class = VectorSerializer permission_classes = (permissions.IsAuthenticatedOrReadOnly,) + + def perform_create(self, serializer): + serializer.save(file=serializer.validated_data['instance'].file) From 1d95efbb5d14b11f79fbc6a7c1e61f5695b92ba7 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Thu, 1 Sep 2016 16:53:13 +0300 Subject: [PATCH 18/68] Add match task exection's progress bar --- idaplugin/rematch/actions/match.py | 36 +++++++++++++++++++----------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index dffa6c455..35586e245 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -21,6 +21,8 @@ def __init__(self, *args, **kwargs): self.source = None self.target = None self.methods = [] + self.task_id = None + self.pbar = QtWidgets.QProgressDialog() def get_functions(self): if self.source == 'idb': @@ -64,25 +66,22 @@ def activate(self, ctx): return self.source, self.target, self.methods = data - print(self.source, self.target, self.methods) function_gen = self.get_functions() if not function_gen: return self.function_gen = enumerate(function_gen) - pd = QtWidgets.QProgressDialog(labelText="Processing...\nYou may continue " - "working but avoid ground-" - "breaking changes.", - maximum=self.get_functions_count()) - self.pbar = pd + self.pbar.setLabel("Processing IDB... You may continue working,\nbut " + "plese avoid making any ground-breaking changes.") + self.pbar.setRange(0, self.get_functions_count()) + self.pbar.setValue(0) self.pbar.canceled.connect(self.cancel) + self.pbar.accepted.connect(self.accepted_upload) self.timer.timeout.connect(self.perform_upload) self.timer.start() - self.pbar.accepted.connect(self.accepted_upload) - def perform_upload(self): try: i, offset = self.function_gen.next() @@ -106,8 +105,19 @@ def accepted_upload(self): self.timer.stop() self.timer.disconnect() - # TODO: ask for project to compare against - task_params = {'action': 'commit', 'file': netnode.bound_file_id, - 'project': None} - r = network.query("POST", "collab/tasks/", params=task_params, json=True) - print(r) + params = {'action': 'commit', 'file': netnode.bound_file_id, + 'project': None} + r = network.query("POST", "collab/tasks/", params=params, json=True) + self.task_id = r['id'] + + self.timer.timeout.connect(self.task_progress) + self.timer.start(1000) + + def task_progress(self): + r = network.query("GET", "collab/tasks", params={'id': self.task_id}, + json=True) + + self.pbar.setLabel("Waiting for remote matching... You may continue " + "working without any limitations.") + self.pbar.setRange(0, int(r['progress_maximum'])) + self.pbar.setValue(int(r['progress'])) From 35f2a4a952717cb0ed80b142aeec1360cb3b9981 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 2 Sep 2016 20:12:33 +0300 Subject: [PATCH 19/68] Fix target_vectors filter Signed-off-by: Nir Izraeli --- server/collab/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/collab/tasks.py b/server/collab/tasks.py index 736b380b9..6b2ac71f0 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -20,8 +20,8 @@ def match(file_id, project_id): print(vector_type) vectors = Vector.objects.filter(type=vector_type) source_vectors = vectors.filter(file_id=file_id) - target_vectors = vectors.filter(file_id__project_id=project_id, - file_id__not=file_id) + target_vectors = vectors.filter(file_id__project_id=project_id) + target_vectors = target_vectors.exclude(file_id=file_id) print(source_vectors) print(target_vectors) print(source_vectors.all()) From 19cffe4e0716c72f2619300240f130bfa49047c4 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 2 Sep 2016 20:16:21 +0300 Subject: [PATCH 20/68] Add task execution progress report and progress bar Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 53 ++++++++++++++++++++---------- server/collab/serializers.py | 4 ++- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 35586e245..c6edc08df 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -22,7 +22,7 @@ def __init__(self, *args, **kwargs): self.target = None self.methods = [] self.task_id = None - self.pbar = QtWidgets.QProgressDialog() + self.pbar = None def get_functions(self): if self.source == 'idb': @@ -72,15 +72,16 @@ def activate(self, ctx): return self.function_gen = enumerate(function_gen) - self.pbar.setLabel("Processing IDB... You may continue working,\nbut " - "plese avoid making any ground-breaking changes.") + self.pbar = QtWidgets.QProgressDialog() + self.pbar.setLabelText("Processing IDB... You may continue working,\nbut " + "please avoid making any ground-breaking changes.") self.pbar.setRange(0, self.get_functions_count()) self.pbar.setValue(0) - self.pbar.canceled.connect(self.cancel) + self.pbar.canceled.connect(self.cancel_upload) self.pbar.accepted.connect(self.accepted_upload) self.timer.timeout.connect(self.perform_upload) - self.timer.start() + self.timer.start(0) def perform_upload(self): try: @@ -95,29 +96,47 @@ def perform_upload(self): if i >= self.pbar.maximum(): self.pbar.accept() except: - self.timer.stop() + self.cancel_upload() raise - def cancel(self): + def cancel_upload(self): self.timer.stop() + self.timer.disconnect() + self.pbar = None def accepted_upload(self): - self.timer.stop() - self.timer.disconnect() + self.cancel_upload() params = {'action': 'commit', 'file': netnode.bound_file_id, 'project': None} r = network.query("POST", "collab/tasks/", params=params, json=True) self.task_id = r['id'] - self.timer.timeout.connect(self.task_progress) + self.pbar = QtWidgets.QProgressDialog() + self.pbar.setLabelText("Waiting for remote matching... You may continue " + "working without any limitations.") + self.pbar.setRange(0, int(r['progress_max'])) + self.pbar.setValue(int(r['progress'])) + self.pbar.canceled.connect(self.cancel_task) + self.pbar.accepted.connect(self.accepted_task) + + self.timer.timeout.connect(self.perform_task) self.timer.start(1000) - def task_progress(self): - r = network.query("GET", "collab/tasks", params={'id': self.task_id}, - json=True) + def perform_task(self): + try: + r = network.query("GET", "collab/tasks/{}/".format(self.task_id), + json=True) - self.pbar.setLabel("Waiting for remote matching... You may continue " - "working without any limitations.") - self.pbar.setRange(0, int(r['progress_maximum'])) - self.pbar.setValue(int(r['progress'])) + self.pbar.setRange(0, int(r['progress_max'])) + self.pbar.setValue(int(r['progress'])) + except: + self.cancel_task() + + def cancel_task(self): + self.timer.stop() + self.timer.disconnect() + self.pbar = None + + def accepted_task(self): + self.cancel_task() diff --git a/server/collab/serializers.py b/server/collab/serializers.py index 5380b7ffa..f2c727dcd 100644 --- a/server/collab/serializers.py +++ b/server/collab/serializers.py @@ -28,6 +28,8 @@ class TaskSerializer(serializers.ModelSerializer): created = serializers.ReadOnlyField() finished = serializers.ReadOnlyField() status = serializers.ReadOnlyField() + progress = serializers.ReadOnlyField() + progress_max = serializers.ReadOnlyField() project = serializers.PrimaryKeyRelatedField(queryset=Project.objects.all(), allow_null=True) @@ -35,7 +37,7 @@ class TaskSerializer(serializers.ModelSerializer): class Meta: model = Task fields = ('id', 'task_id', 'created', 'finished', 'owner', 'status', - 'action', 'project', 'file') + 'action', 'project', 'file', 'progress', 'progress_max') class InstanceSerializer(serializers.ModelSerializer): From 69b48b651ad3df15b9be6950d9e04e5d79e170f1 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 2 Sep 2016 20:16:53 +0300 Subject: [PATCH 21/68] use BaseDialog's bottom layout Signed-off-by: Nir Izraeli --- idaplugin/rematch/dialogs/match.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/idaplugin/rematch/dialogs/match.py b/idaplugin/rematch/dialogs/match.py index 781cededf..82baff9d3 100755 --- a/idaplugin/rematch/dialogs/match.py +++ b/idaplugin/rematch/dialogs/match.py @@ -40,24 +40,8 @@ def __init__(self, **kwargs): self.layout.addWidget(methodGbx) self.statusLbl = QtWidgets.QLabel() - self.layout.addWidget(self.statusLbl) - startBtn = QtWidgets.QPushButton("&Start matching") - startBtn.setDefault(True) - cancelBtn = QtWidgets.QPushButton("&Cancel") - SizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, - QtWidgets.QSizePolicy.Fixed) - startBtn.setSizePolicy(SizePolicy) - cancelBtn.setSizePolicy(SizePolicy) - buttonLyt = QtWidgets.QHBoxLayout() - buttonLyt.addWidget(startBtn) - buttonLyt.addWidget(cancelBtn) - self.layout.addLayout(buttonLyt) - - self.setLayout(self.layout) - - startBtn.clicked.connect(self.accept) - cancelBtn.clicked.connect(self.reject) + self.bottom_layout(self.accept, "&Start matching") def data(self): source = self.get_radio_result(self.sourceGrp) From d7d17afd4f83645cb8475dde1ff2d2f8898df6f4 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 2 Sep 2016 21:02:18 +0300 Subject: [PATCH 22/68] explicitly catch all exceptions and raise on MatchAction.perform_task Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index c6edc08df..10fde101d 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -95,7 +95,7 @@ def perform_upload(self): self.pbar.setValue(i) if i >= self.pbar.maximum(): self.pbar.accept() - except: + except Exception: self.cancel_upload() raise @@ -130,8 +130,9 @@ def perform_task(self): self.pbar.setRange(0, int(r['progress_max'])) self.pbar.setValue(int(r['progress'])) - except: + except Exception: self.cancel_task() + raise def cancel_task(self): self.timer.stop() From d7b41002c05dd74f2ca9373bdedc4a8440500923 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sat, 3 Sep 2016 02:49:41 +0300 Subject: [PATCH 23/68] Add task to admin panel Signed-off-by: Nir Izraeli --- server/collab/admin.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/collab/admin.py b/server/collab/admin.py index ebc9a2413..59247225e 100644 --- a/server/collab/admin.py +++ b/server/collab/admin.py @@ -1,11 +1,12 @@ -from collab.models import (Project, File, Instance, Vector, NameAnnotation, - CommentAnnotation, RptCommentAnnotation, - AboveLineCommentAnnotation, +from collab.models import (Project, File, Task, Instance, Vector, + NameAnnotation, CommentAnnotation, + RptCommentAnnotation, AboveLineCommentAnnotation, BelowLineCommentAnnotation) from django.contrib import admin admin.site.register(Project) admin.site.register(File) +admin.site.register(Task) admin.site.register(Instance) admin.site.register(Vector) From eac0bea0899d05fd1192c99693ad08bd7c6bf128 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Mon, 5 Sep 2016 06:01:46 +0300 Subject: [PATCH 24/68] fix two base_layout issues Signed-off-by: Nir Izraeli --- idaplugin/rematch/dialogs/base.py | 2 +- idaplugin/rematch/dialogs/match.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/idaplugin/rematch/dialogs/base.py b/idaplugin/rematch/dialogs/base.py index 3c55c9af6..aec6f7414 100755 --- a/idaplugin/rematch/dialogs/base.py +++ b/idaplugin/rematch/dialogs/base.py @@ -40,7 +40,7 @@ def add_radio_group(self, title, *radios, **kwargs): layout.addWidget(radio_widget) self.radio_groups[radiogroup].append(radio_id) groupbox.setLayout(layout) - self.layout.addWidget(groupbox) + self.base_layout.addWidget(groupbox) return radiogroup diff --git a/idaplugin/rematch/dialogs/match.py b/idaplugin/rematch/dialogs/match.py index 82baff9d3..c7df5df11 100755 --- a/idaplugin/rematch/dialogs/match.py +++ b/idaplugin/rematch/dialogs/match.py @@ -11,8 +11,6 @@ class MatchDialog(base.BaseDialog): def __init__(self, **kwargs): super(MatchDialog, self).__init__(title="Match", **kwargs) - self.layout = QtWidgets.QVBoxLayout() - self.sourceGrp = self.add_radio_group("Match source", ("Entire IDB", 'idb'), ("User functions", 'user'), @@ -37,9 +35,7 @@ def __init__(self, **kwargs): methodGbx = QtWidgets.QGroupBox("Match methods") methodGbx.setLayout(methodLyt) - self.layout.addWidget(methodGbx) - - self.statusLbl = QtWidgets.QLabel() + self.base_layout.addWidget(methodGbx) self.bottom_layout(self.accept, "&Start matching") From a7f6de75451b84468d442eb82d640cf0f8e1f811 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Mon, 5 Sep 2016 06:16:19 +0300 Subject: [PATCH 25/68] accept null project when creating task Signed-off-by: Nir Izraeli --- server/collab/models.py | 2 +- server/collab/views.py | 16 +++++----------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/server/collab/models.py b/server/collab/models.py index c2d24498a..eb4292fd8 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -116,7 +116,7 @@ class Task(models.Model): choices=STATUS_CHOICES) action = models.CharField(max_length=16, choices=ACTION_CHOICES) - project = models.ForeignKey(Project, related_name='tasks') + project = models.ForeignKey(Project, null=True, related_name='tasks') file = models.ForeignKey(File, related_name='tasks') progress = models.PositiveSmallIntegerField(default=0) diff --git a/server/collab/views.py b/server/collab/views.py index 5777a29f0..ca6b8f7d7 100644 --- a/server/collab/views.py +++ b/server/collab/views.py @@ -45,17 +45,11 @@ class TaskViewSet(mixins.CreateModelMixin, mixins.RetrieveModelMixin, IsOwnerOrReadOnly) def perform_create(self, serializer): - if not serializer.validated_data['project']: - project = serializer.validated_data['file'].project - serializer.validated_data['project'] = project - - # if no project, let serializer.save fail on none project - if serializer.validated_data['project']: - result = tasks.match.delay(serializer.validated_data['file'].id, - serializer.validated_data['project'].id) - serializer.save(owner=self.request.user, task_id=result.id) - else: - serializer.save(owner=self.request.user, task_id='') + file_id = serializer.validated_data['file'].id + project = serializer.validated_data['project'] + project_id = project.id if project else None + result = tasks.match.delay(file_id, project_id) + serializer.save(owner=self.request.user, task_id=result.id) class InstanceViewSet(ViewSetManyAllowedMixin, ViewSetOwnerMixin, From f73894d194441d421fdac6d7f0bb8883ce60c0c5 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Tue, 6 Sep 2016 02:13:20 +0300 Subject: [PATCH 26/68] Accept progress bar when task is complete Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 10fde101d..f710ea0ae 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -128,8 +128,13 @@ def perform_task(self): r = network.query("GET", "collab/tasks/{}/".format(self.task_id), json=True) - self.pbar.setRange(0, int(r['progress_max'])) - self.pbar.setValue(int(r['progress'])) + progress_max = int(r['progress_max']) + progress = int(r['progress']) + self.pbar.setRange(0, progress_max) + self.pbar.setValue(progress) + + if progress >= progress_max: + self.pbar.accept() except Exception: self.cancel_task() raise From b02ddd8ad73031f96c1aa104269bb7e85a35aade Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 7 Sep 2016 00:13:44 +0300 Subject: [PATCH 27/68] Bulk delayed instance requests to speed up the collection phase Which now takes only a few seconds on a 2k functions idb Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index f710ea0ae..f1f5e1aec 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -23,6 +23,7 @@ def __init__(self, *args, **kwargs): self.methods = [] self.task_id = None self.pbar = None + self.instance_set = [] def get_functions(self): if self.source == 'idb': @@ -86,19 +87,32 @@ def activate(self, ctx): def perform_upload(self): try: i, offset = self.function_gen.next() + except StopIteration: + self.timer.stop() + self.timer.disconnect() + return + try: func = instances.FunctionInstance(netnode.bound_file_id, offset) - network.query("POST", "collab/instances/", params=func.serialize(), - json=True) - - i = i + 1 - self.pbar.setValue(i) - if i >= self.pbar.maximum(): - self.pbar.accept() + self.instance_set.append(func.serialize()) + + if len(self.instance_set) >= 100: + network.delayed_query("POST", "collab/instances/", + params=self.instance_set, json=True, + callback=self.progress_advance) + self.instance_set = [] + self.pbar.setMaximum(self.pbar.maximum() + 1) + self.progress_advance() except Exception: self.cancel_upload() raise + def progress_advance(self, result=None): + new_value = self.pbar.value() + 1 + self.pbar.setValue(new_value) + if new_value >= self.pbar.maximum(): + self.pbar.accept() + def cancel_upload(self): self.timer.stop() self.timer.disconnect() From f2244cd56c03189729b53353d7ef9e3d85572beb Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 7 Sep 2016 03:18:33 +0300 Subject: [PATCH 28/68] simplify get_functions_count Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index f1f5e1aec..a76e24b96 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -45,19 +45,7 @@ def get_functions(self): "".format(self.source)) def get_functions_count(self): - if self.source == 'idb': - return len(set(idautils.Functions())) - elif self.source == 'user': - raise NotImplementedError("All user functions are not currently " - "supported as source value.") - elif self.source == 'single': - return 1 - elif self.source == 'range': - raise NotImplementedError("Range of addresses is not currently " - "supported as source value.") - - raise ValueError("Invalid source value received from MatchDialog: {}" - "".format(self.source)) + return len(list(self.get_functions())) def activate(self, ctx): dialog = MatchDialog() From 94a7d7529e5ded1aef16382ebc67b019d4bbb482 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 7 Sep 2016 23:56:07 +0300 Subject: [PATCH 29/68] Create serverside vector classes to reduce boilerplate Signed-off-by: Nir Izraeli --- server/collab/models.py | 14 ++----------- server/collab/tasks.py | 10 ++++++---- server/collab/vectors.py | 43 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 16 deletions(-) create mode 100644 server/collab/vectors.py diff --git a/server/collab/models.py b/server/collab/models.py index eb4292fd8..cad769a55 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -3,6 +3,7 @@ from django.contrib.auth.models import User from django.core.validators import MinLengthValidator from collab.validators import IdbValidator +from collab import vectors class Project(models.Model): @@ -60,18 +61,7 @@ def __unicode__(self): class Vector(models.Model): - DUMMY = 'dummy' - TYPE_HASH = 'hash' - TYPE_ASSEMBLY_HASH = 'assembly_hash' - TYPE_MNEMONIC_HASH = 'mnemonic_hash' - TYPE_MNEMONIC_HIST = 'mnemonic_hist' - TYPE_OPCODE_HIST = 'opcode_histogram' - TYPE_CHOICES = ((DUMMY, "Dummy"), - (TYPE_HASH, "Hash"), - (TYPE_ASSEMBLY_HASH, "Assembly Hash"), - (TYPE_MNEMONIC_HASH, "Mnemonic Hash"), - (TYPE_MNEMONIC_HIST, "Mnemonic Hist"), - (TYPE_OPCODE_HIST, "Opcode Histogram")) + TYPE_CHOICES = [(vector.id, vector.name) for vector in vectors.vector_list] instance = models.ForeignKey(Instance, related_name='vectors') file = models.ForeignKey(File, related_name='vectors') diff --git a/server/collab/tasks.py b/server/collab/tasks.py index 6b2ac71f0..173fce857 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -1,6 +1,7 @@ from django.utils.timezone import now from django.db.models import F from models import Task, Vector +import vectors from celery import shared_task @@ -16,16 +17,17 @@ def match(file_id, project_id): print("Running task {}".format(match.request.id)) # TODO: order might be important here - for vector_type in vector_types: + for vector_type in vectors.vector_list: print(vector_type) - vectors = Vector.objects.filter(type=vector_type) - source_vectors = vectors.filter(file_id=file_id) - target_vectors = vectors.filter(file_id__project_id=project_id) + vectors_filter = Vector.objects.filter(type=vector_type.id) + source_vectors = vectors_filter.filter(file_id=file_id) + target_vectors = vectors_filter.filter(file_id__project_id=project_id) target_vectors = target_vectors.exclude(file_id=file_id) print(source_vectors) print(target_vectors) print(source_vectors.all()) print(target_vectors.all()) + vector_type.match(source_vectors, target_vectors) task.update(progress=F('progress') + 1) diff --git a/server/collab/vectors.py b/server/collab/vectors.py new file mode 100644 index 000000000..bdf931a7b --- /dev/null +++ b/server/collab/vectors.py @@ -0,0 +1,43 @@ +class Vector: + @classmethod + def match(cls, soure, target): + raise NotImplementedError("Method match for vector type {} not " + "implemented".format(cls)) + + +class DummyVector(Vector): + id = 'dummy' + name = 'Dummy' + + @classmethod + def match(cls, soure, target): + return [] + + +class HashVector(Vector): + id = 'hash' + name = 'Hash' + + +class AssemblyHashVector(Vector): + id = 'assembly_hash' + name = 'Assembly Hash' + + +class MnemonicHashVector(Vector): + id = 'mnemonic_hash' + name = 'Mnemonic Hash' + + +class MnemonicHistogramVector(Vector): + id = 'mnemonic_hist' + name = 'Mnemonic Histogram' + + +class OpcodeHistogramVector(Vector): + id = 'opcode_histogram' + name = 'Opcode Histogram' + +vector_list = [DummyVector, HashVector, AssemblyHashVector, + MnemonicHashVector, MnemonicHistogramVector, + OpcodeHistogramVector] From b05bf871d5fb2bce3659d734b6ea7787eb648121 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Thu, 8 Sep 2016 00:00:22 +0300 Subject: [PATCH 30/68] safer timer use in match action Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index a76e24b96..6f213f553 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -17,7 +17,7 @@ def __init__(self, *args, **kwargs): super(MatchAction, self).__init__(*args, **kwargs) self.function_gen = None self.pbar = None - self.timer = QtCore.QTimer() + self.timer = None self.source = None self.target = None self.methods = [] @@ -69,6 +69,7 @@ def activate(self, ctx): self.pbar.canceled.connect(self.cancel_upload) self.pbar.accepted.connect(self.accepted_upload) + self.timer = QtCore.QTimer() self.timer.timeout.connect(self.perform_upload) self.timer.start(0) @@ -77,7 +78,6 @@ def perform_upload(self): i, offset = self.function_gen.next() except StopIteration: self.timer.stop() - self.timer.disconnect() return try: @@ -103,7 +103,7 @@ def progress_advance(self, result=None): def cancel_upload(self): self.timer.stop() - self.timer.disconnect() + self.timer = None self.pbar = None def accepted_upload(self): @@ -122,6 +122,7 @@ def accepted_upload(self): self.pbar.canceled.connect(self.cancel_task) self.pbar.accepted.connect(self.accepted_task) + self.timer = QtCore.QTimer() self.timer.timeout.connect(self.perform_task) self.timer.start(1000) @@ -143,7 +144,7 @@ def perform_task(self): def cancel_task(self): self.timer.stop() - self.timer.disconnect() + self.timer = None self.pbar = None def accepted_task(self): From 4df4f209a162620209bc24ac85652cf87925b0e4 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Thu, 8 Sep 2016 00:04:59 +0300 Subject: [PATCH 31/68] fix codacy minor issues Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 9 +++++---- server/collab/views.py | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 6f213f553..b3058326b 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -96,10 +96,11 @@ def perform_upload(self): raise def progress_advance(self, result=None): - new_value = self.pbar.value() + 1 - self.pbar.setValue(new_value) - if new_value >= self.pbar.maximum(): - self.pbar.accept() + del result + new_value = self.pbar.value() + 1 + self.pbar.setValue(new_value) + if new_value >= self.pbar.maximum(): + self.pbar.accept() def cancel_upload(self): self.timer.stop() diff --git a/server/collab/views.py b/server/collab/views.py index ca6b8f7d7..14007c137 100644 --- a/server/collab/views.py +++ b/server/collab/views.py @@ -64,4 +64,5 @@ class VectorViewSet(ViewSetManyAllowedMixin, viewsets.ModelViewSet): permission_classes = (permissions.IsAuthenticatedOrReadOnly,) def perform_create(self, serializer): + del self serializer.save(file=serializer.validated_data['instance'].file) From 4578c4549a9f31988dd3a0557e622915f80a85fa Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Tue, 13 Sep 2016 03:12:28 +0300 Subject: [PATCH 32/68] add type and score to Match Model Signed-off-by: Nir Izraeli --- server/collab/models.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/collab/models.py b/server/collab/models.py index cad769a55..6a1da61e3 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -116,6 +116,9 @@ class Task(models.Model): class Match(Instance.matches.through()): task = models.ForeignKey(Task, db_index=True, related_name='matches') + type = models.CharField(max_length=16, choices=Vector.TYPE_CHOICES) + score = models.FloatField() + # # Anotations From 975b184dbf024bd30e4d4dca96d8bfe3599ff29d Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Thu, 15 Sep 2016 04:19:17 +0300 Subject: [PATCH 33/68] implement match for identity functions Signed-off-by: Nir Izraeli --- server/collab/vectors.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/server/collab/vectors.py b/server/collab/vectors.py index bdf931a7b..4820a8ba4 100644 --- a/server/collab/vectors.py +++ b/server/collab/vectors.py @@ -1,3 +1,6 @@ +from collections import defaultdict + + class Vector: @classmethod def match(cls, soure, target): @@ -18,6 +21,20 @@ class HashVector(Vector): id = 'hash' name = 'Hash' + @staticmethod + def match(source, target): + # unique_values = set(source_dict.values()) + flipped_rest = defaultdict(list) + # TODO: could be optimized by enumerating all identity matchs together + for target_id, target_data in target.values_list('id', 'data').iterator(): + # TODO: could be optimized by uncommenting next line as most 'target' + # values won't be present in 'source' list + # if v in unique_values: + flipped_rest[target_data].append(target_id) + for source_id, source_data in source.values_list('id', 'data').iterator(): + for target_id in flipped_rest.get(source_data, ()): + yield (source_id, target_id) + class AssemblyHashVector(Vector): id = 'assembly_hash' From 4ad58b4f6ffb6c7e04b7a411ff83b6737cb019fa Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Thu, 15 Sep 2016 18:01:18 +0300 Subject: [PATCH 34/68] make Vector class inherit and remove VectorHash from list of avaiable hashes Signed-off-by: Nir Izraeli --- server/collab/vectors.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/server/collab/vectors.py b/server/collab/vectors.py index 4820a8ba4..3edff726c 100644 --- a/server/collab/vectors.py +++ b/server/collab/vectors.py @@ -18,9 +18,6 @@ def match(cls, soure, target): class HashVector(Vector): - id = 'hash' - name = 'Hash' - @staticmethod def match(source, target): # unique_values = set(source_dict.values()) @@ -36,12 +33,12 @@ def match(source, target): yield (source_id, target_id) -class AssemblyHashVector(Vector): +class AssemblyHashVector(HashVector): id = 'assembly_hash' name = 'Assembly Hash' -class MnemonicHashVector(Vector): +class MnemonicHashVector(HashVector): id = 'mnemonic_hash' name = 'Mnemonic Hash' @@ -55,6 +52,5 @@ class OpcodeHistogramVector(Vector): id = 'opcode_histogram' name = 'Opcode Histogram' -vector_list = [DummyVector, HashVector, AssemblyHashVector, - MnemonicHashVector, MnemonicHistogramVector, - OpcodeHistogramVector] +vector_list = [DummyVector, AssemblyHashVector, MnemonicHashVector, + MnemonicHistogramVector,OpcodeHistogramVector] From b8958ee9e72ee9996485211c7c9a191588b8a692 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 16 Sep 2016 13:08:52 +0300 Subject: [PATCH 35/68] Add space Signed-off-by: Nir Izraeli --- server/collab/vectors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/collab/vectors.py b/server/collab/vectors.py index 3edff726c..68e3656fb 100644 --- a/server/collab/vectors.py +++ b/server/collab/vectors.py @@ -53,4 +53,4 @@ class OpcodeHistogramVector(Vector): name = 'Opcode Histogram' vector_list = [DummyVector, AssemblyHashVector, MnemonicHashVector, - MnemonicHistogramVector,OpcodeHistogramVector] + MnemonicHistogramVector, OpcodeHistogramVector] From d3417297ed6d8f2795d6fa7c48442e07c196b54c Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 16 Sep 2016 13:22:57 +0300 Subject: [PATCH 36/68] HistogramVector skeleton Signed-off-by: Nir Izraeli --- server/collab/vectors.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/server/collab/vectors.py b/server/collab/vectors.py index 68e3656fb..d536adde8 100644 --- a/server/collab/vectors.py +++ b/server/collab/vectors.py @@ -43,12 +43,18 @@ class MnemonicHashVector(HashVector): name = 'Mnemonic Hash' -class MnemonicHistogramVector(Vector): +class HistogramVector(Vector): + @staticmethod + def match(source, target): + pass + + +class MnemonicHistogramVector(HistogramVector): id = 'mnemonic_hist' name = 'Mnemonic Histogram' -class OpcodeHistogramVector(Vector): +class OpcodeHistogramVector(HistogramVector): id = 'opcode_histogram' name = 'Opcode Histogram' From 042a105f63d7faefac8d4ab020b29e92edea1642 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sat, 17 Sep 2016 02:23:55 +0300 Subject: [PATCH 37/68] create Match objects when successfully matching Signed-off-by: Nir Izraeli --- server/collab/tasks.py | 7 +++++-- server/collab/vectors.py | 14 ++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/server/collab/tasks.py b/server/collab/tasks.py index 173fce857..c538f5bfb 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -21,13 +21,16 @@ def match(file_id, project_id): print(vector_type) vectors_filter = Vector.objects.filter(type=vector_type.id) source_vectors = vectors_filter.filter(file_id=file_id) - target_vectors = vectors_filter.filter(file_id__project_id=project_id) + if project_id: + target_vectors = vectors_filter.filter(file_id__project_id=project_id) target_vectors = target_vectors.exclude(file_id=file_id) print(source_vectors) print(target_vectors) print(source_vectors.all()) print(target_vectors.all()) - vector_type.match(source_vectors, target_vectors) + matches = vector_type.get_matches(source_vectors, target_vectors, + task.id) + print(list(matches)) task.update(progress=F('progress') + 1) diff --git a/server/collab/vectors.py b/server/collab/vectors.py index d536adde8..7ed914767 100644 --- a/server/collab/vectors.py +++ b/server/collab/vectors.py @@ -1,12 +1,18 @@ from collections import defaultdict +from models import Match class Vector: @classmethod - def match(cls, soure, target): + def match(cls, source, target): raise NotImplementedError("Method match for vector type {} not " "implemented".format(cls)) + @classmethod + def get_matches(cls, source, target, task_id): + for source_id, target_id, score in cls.match(source, target): + yield Match(source_id, target_id, score=score, type=cls.id) + class DummyVector(Vector): id = 'dummy' @@ -18,8 +24,8 @@ def match(cls, soure, target): class HashVector(Vector): - @staticmethod - def match(source, target): + @classmethod + def match(cls, source, target): # unique_values = set(source_dict.values()) flipped_rest = defaultdict(list) # TODO: could be optimized by enumerating all identity matchs together @@ -30,7 +36,7 @@ def match(source, target): flipped_rest[target_data].append(target_id) for source_id, source_data in source.values_list('id', 'data').iterator(): for target_id in flipped_rest.get(source_data, ()): - yield (source_id, target_id) + yield source_id, target_id, 100 class AssemblyHashVector(HashVector): From 8915bfe233f3b6fb0e84fbfb214195c666ad9ce3 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sat, 17 Sep 2016 19:15:44 +0300 Subject: [PATCH 38/68] Split id to match_type and vector_type, to allow several matches using the same vector type Signed-off-by: Nir Izraeli --- server/collab/tasks.py | 2 +- server/collab/vectors.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/server/collab/tasks.py b/server/collab/tasks.py index c538f5bfb..859aef9b8 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -19,7 +19,7 @@ def match(file_id, project_id): # TODO: order might be important here for vector_type in vectors.vector_list: print(vector_type) - vectors_filter = Vector.objects.filter(type=vector_type.id) + vectors_filter = Vector.objects.filter(type=vector_type.vector_type) source_vectors = vectors_filter.filter(file_id=file_id) if project_id: target_vectors = vectors_filter.filter(file_id__project_id=project_id) diff --git a/server/collab/vectors.py b/server/collab/vectors.py index 7ed914767..030381ec7 100644 --- a/server/collab/vectors.py +++ b/server/collab/vectors.py @@ -11,11 +11,11 @@ def match(cls, source, target): @classmethod def get_matches(cls, source, target, task_id): for source_id, target_id, score in cls.match(source, target): - yield Match(source_id, target_id, score=score, type=cls.id) + yield Match(source_id, target_id, score=score, type=cls.match_type) class DummyVector(Vector): - id = 'dummy' + match_type = 'dummy' name = 'Dummy' @classmethod @@ -40,12 +40,14 @@ def match(cls, source, target): class AssemblyHashVector(HashVector): - id = 'assembly_hash' + vector_type = 'assembly_hash' + match_type = 'assembly_hash' name = 'Assembly Hash' class MnemonicHashVector(HashVector): - id = 'mnemonic_hash' + vector_type = 'mnemonic_hash' + match_type = 'mnemonic_hash' name = 'Mnemonic Hash' @@ -56,12 +58,14 @@ def match(source, target): class MnemonicHistogramVector(HistogramVector): - id = 'mnemonic_hist' + vector_type = 'mnemonic_hist' + match_type = 'mnemonic_hist' name = 'Mnemonic Histogram' class OpcodeHistogramVector(HistogramVector): - id = 'opcode_histogram' + vector_type = 'opcode_histogram' + match_type = 'opcode_histogram' name = 'Opcode Histogram' vector_list = [DummyVector, AssemblyHashVector, MnemonicHashVector, From 3b4a9d809d0c6a0fc052a738b8d463a4715229da Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Mon, 19 Sep 2016 00:35:16 +0300 Subject: [PATCH 39/68] Begining of histogram matching Signed-off-by: Nir Izraeli --- server/collab/vectors.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/server/collab/vectors.py b/server/collab/vectors.py index 030381ec7..a7771620c 100644 --- a/server/collab/vectors.py +++ b/server/collab/vectors.py @@ -1,3 +1,6 @@ +import numpy as np +import scipy as sp + from collections import defaultdict from models import Match @@ -54,7 +57,10 @@ class MnemonicHashVector(HashVector): class HistogramVector(Vector): @staticmethod def match(source, target): - pass + source_matrix = np.narray(source) + target_matrix = np.narray(target) + distances = sp.spatial.distance.cdist(source_matrix, target_matrix) + min_distances = distances.argmin(axis=0) class MnemonicHistogramVector(HistogramVector): From 9c71fc5d06b95001c554b8f65572437d76449ab6 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Tue, 20 Sep 2016 14:17:28 +0300 Subject: [PATCH 40/68] Add scipy and sklearn to requirements Signed-off-by: Nir Izraeli --- server/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/requirements.txt b/server/requirements.txt index a1b083ef3..e4a3778e3 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -1,5 +1,7 @@ django numpy +scipy +scikit-learn django-registration-redux djangorestframework django-rest-auth From 85b690e570008de3d67165505f95e3e2e57443d8 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Tue, 20 Sep 2016 14:21:37 +0300 Subject: [PATCH 41/68] Mark task as failed on exception (and reraise it) Signed-off-by: Nir Izraeli --- server/collab/tasks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server/collab/tasks.py b/server/collab/tasks.py index 859aef9b8..f8e7a5439 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -17,7 +17,8 @@ def match(file_id, project_id): print("Running task {}".format(match.request.id)) # TODO: order might be important here - for vector_type in vectors.vector_list: + try: + for vector_type in vectors.vector_list: print(vector_type) vectors_filter = Vector.objects.filter(type=vector_type.vector_type) source_vectors = vectors_filter.filter(file_id=file_id) @@ -33,5 +34,8 @@ def match(file_id, project_id): print(list(matches)) task.update(progress=F('progress') + 1) + except Exception as ex: + task.update(status=Task.STATUS_FAILED, finished=now()) + raise ex task.update(status=Task.STATUS_DONE, finished=now()) From f3e9aac16aef2952fabbf1e0bd82df4504ec7bcd Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 21 Sep 2016 18:31:16 +0300 Subject: [PATCH 42/68] move Match object creation to tasks.py to avoid cyclic import Signed-off-by: Nir Izraeli --- server/collab/tasks.py | 11 +++++++---- server/collab/vectors.py | 6 ------ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/server/collab/tasks.py b/server/collab/tasks.py index f8e7a5439..4b6867c7e 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -1,6 +1,6 @@ from django.utils.timezone import now from django.db.models import F -from models import Task, Vector +from models import Task, Vector, Match import vectors from celery import shared_task @@ -29,9 +29,12 @@ def match(file_id, project_id): print(target_vectors) print(source_vectors.all()) print(target_vectors.all()) - matches = vector_type.get_matches(source_vectors, target_vectors, - task.id) - print(list(matches)) + matches = vector_type.match(source_vectors, target_vectors, task.id) + match_objs = [Match(source, target, score=score, + type=vector_type.match_type) + for source, target, score in matches] + Match.objects.bulk_create(match_objs) + print(list(match_objs)) task.update(progress=F('progress') + 1) except Exception as ex: diff --git a/server/collab/vectors.py b/server/collab/vectors.py index a7771620c..4df954282 100644 --- a/server/collab/vectors.py +++ b/server/collab/vectors.py @@ -2,7 +2,6 @@ import scipy as sp from collections import defaultdict -from models import Match class Vector: @@ -11,11 +10,6 @@ def match(cls, source, target): raise NotImplementedError("Method match for vector type {} not " "implemented".format(cls)) - @classmethod - def get_matches(cls, source, target, task_id): - for source_id, target_id, score in cls.match(source, target): - yield Match(source_id, target_id, score=score, type=cls.match_type) - class DummyVector(Vector): match_type = 'dummy' From eb3320e0640b1043f2e11d5627bd6f27d46475a4 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 21 Sep 2016 19:19:31 +0300 Subject: [PATCH 43/68] Add miniconda install to travis when testing server Signed-off-by: Nir Izraeli --- .travis.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.travis.yml b/.travis.yml index 5844cba7a..9c0a0de91 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,6 +28,19 @@ before_install: - git submodule update --init --recursive install: + - if [ ${PROJECT} = "server" ]; then + set +vx; + if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then + wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh; + else + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; + fi; + bash ./miniconda.sh -b -p ${HOME}/miniconda; + export PATH=${HOME}/miniconda/bin:$PATH; + conda info -a; + conda update --yes conda; + conda install --yes python=${TRAVIS_PYTHON_VERSION} atlas numpy scipy scikit-learn; + fi; - if [ -f ${PROJECT}/requirements.txt ]; then pip install -r ${PROJECT}/requirements.txt ; fi - pip install flake8 From 5e2454389ee83a6ec3d1030d1337623c61ec0eec Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 23 Sep 2016 01:09:08 +0300 Subject: [PATCH 44/68] finish HistogramVector.match Signed-off-by: Nir Izraeli --- server/collab/vectors.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/server/collab/vectors.py b/server/collab/vectors.py index 4df954282..23e689f17 100644 --- a/server/collab/vectors.py +++ b/server/collab/vectors.py @@ -3,6 +3,8 @@ from collections import defaultdict +from sklearn.preprocessing import normalize + class Vector: @classmethod @@ -51,10 +53,17 @@ class MnemonicHashVector(HashVector): class HistogramVector(Vector): @staticmethod def match(source, target): - source_matrix = np.narray(source) - target_matrix = np.narray(target) + source_id, source_data = source.values('id', 'data') + target_id, target_data = target.values('id', 'data') + source_matrix = normalize(np.narray(source_data), axis=1, norm='l1') + target_matrix = normalize(np.narray(target_data), axis=1, norm='l1') distances = sp.spatial.distance.cdist(source_matrix, target_matrix) - min_distances = distances.argmin(axis=0) + for source_i in range(source_matrix.shape[0]): + for target_i in range(target_matrix.shape[0]): + source_id = source_id[source_i] + target_id = target_id[target_i] + score = distances[source_i][target_i] + yield source_id, target_id, score class MnemonicHistogramVector(HistogramVector): From 60b8e11d93c2d2d8a7af28f838af4bb8e60c5d84 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 23 Sep 2016 01:14:30 +0300 Subject: [PATCH 45/68] Remove python 3.2 from travis, fix codacy issues Conda doesn't support python 3.2 Signed-off-by: Nir Izraeli --- .travis.yml | 3 --- server/collab/vectors.py | 6 ++++-- server/collab/views.py | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9c0a0de91..8722c33e8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ language: python python: - "2.7" - - "3.2" - "3.4" - "3.5" env: @@ -10,8 +9,6 @@ env: matrix: # idaplugin is run with python2.7, we'll check against python3 to assert compatibility exclude: - - python: "3.2" - env: PROJECT=idaplugin - python: "3.4" env: PROJECT=idaplugin diff --git a/server/collab/vectors.py b/server/collab/vectors.py index 23e689f17..1357c30b9 100644 --- a/server/collab/vectors.py +++ b/server/collab/vectors.py @@ -17,8 +17,10 @@ class DummyVector(Vector): match_type = 'dummy' name = 'Dummy' - @classmethod - def match(cls, soure, target): + @staticmethod + def match(source, target): + del source + del target return [] diff --git a/server/collab/views.py b/server/collab/views.py index 14007c137..2c65c8a3e 100644 --- a/server/collab/views.py +++ b/server/collab/views.py @@ -63,6 +63,6 @@ class VectorViewSet(ViewSetManyAllowedMixin, viewsets.ModelViewSet): serializer_class = VectorSerializer permission_classes = (permissions.IsAuthenticatedOrReadOnly,) - def perform_create(self, serializer): - del self + @staticmethod + def perform_create(serializer): serializer.save(file=serializer.validated_data['instance'].file) From afd5b63c12d7feff8117f52ec8224cf9da624dfe Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sat, 24 Sep 2016 00:10:39 +0300 Subject: [PATCH 46/68] Better seperation between server matching algos and database vectors Signed-off-by: Nir Izraeli --- server/collab/{vectors.py => matches.py} | 25 ++++++++++-------------- server/collab/models.py | 14 +++++++++++-- server/collab/tasks.py | 19 ++++++++---------- 3 files changed, 30 insertions(+), 28 deletions(-) rename server/collab/{vectors.py => matches.py} (79%) diff --git a/server/collab/vectors.py b/server/collab/matches.py similarity index 79% rename from server/collab/vectors.py rename to server/collab/matches.py index 1357c30b9..3eab28648 100644 --- a/server/collab/vectors.py +++ b/server/collab/matches.py @@ -6,16 +6,15 @@ from sklearn.preprocessing import normalize -class Vector: +class Match: @classmethod def match(cls, source, target): raise NotImplementedError("Method match for vector type {} not " "implemented".format(cls)) -class DummyVector(Vector): +class DummyMatch(Match): match_type = 'dummy' - name = 'Dummy' @staticmethod def match(source, target): @@ -24,7 +23,7 @@ def match(source, target): return [] -class HashVector(Vector): +class HashMatch(Match): @classmethod def match(cls, source, target): # unique_values = set(source_dict.values()) @@ -40,19 +39,17 @@ def match(cls, source, target): yield source_id, target_id, 100 -class AssemblyHashVector(HashVector): +class AssemblyHashMatch(HashMatch): vector_type = 'assembly_hash' match_type = 'assembly_hash' - name = 'Assembly Hash' -class MnemonicHashVector(HashVector): +class MnemonicHashMatch(HashMatch): vector_type = 'mnemonic_hash' match_type = 'mnemonic_hash' - name = 'Mnemonic Hash' -class HistogramVector(Vector): +class HistogramMatch(Match): @staticmethod def match(source, target): source_id, source_data = source.values('id', 'data') @@ -68,16 +65,14 @@ def match(source, target): yield source_id, target_id, score -class MnemonicHistogramVector(HistogramVector): +class MnemonicHistogramMatch(HistogramMatch): vector_type = 'mnemonic_hist' match_type = 'mnemonic_hist' - name = 'Mnemonic Histogram' -class OpcodeHistogramVector(HistogramVector): +class OpcodeHistogramMatch(HistogramMatch): vector_type = 'opcode_histogram' match_type = 'opcode_histogram' - name = 'Opcode Histogram' -vector_list = [DummyVector, AssemblyHashVector, MnemonicHashVector, - MnemonicHistogramVector, OpcodeHistogramVector] +match_list = [DummyMatch, AssemblyHashMatch, MnemonicHashMatch, + MnemonicHistogramMatch, OpcodeHistogramMatch] diff --git a/server/collab/models.py b/server/collab/models.py index 6a1da61e3..145c5cd23 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -3,7 +3,6 @@ from django.contrib.auth.models import User from django.core.validators import MinLengthValidator from collab.validators import IdbValidator -from collab import vectors class Project(models.Model): @@ -61,7 +60,18 @@ def __unicode__(self): class Vector(models.Model): - TYPE_CHOICES = [(vector.id, vector.name) for vector in vectors.vector_list] + DUMMY = 'dummy' + TYPE_HASH = 'hash' + TYPE_ASSEMBLY_HASH = 'assembly_hash' + TYPE_MNEMONIC_HASH = 'mnemonic_hash' + TYPE_MNEMONIC_HIST = 'mnemonic_hist' + TYPE_OPCODE_HIST = 'opcode_histogram' + TYPE_CHOICES = ((DUMMY, "Dummy"), + (TYPE_HASH, "Hash"), + (TYPE_ASSEMBLY_HASH, "Assembly Hash"), + (TYPE_MNEMONIC_HASH, "Mnemonic Hash"), + (TYPE_MNEMONIC_HIST, "Mnemonic Hist"), + (TYPE_OPCODE_HIST, "Opcode Histogram")) instance = models.ForeignKey(Instance, related_name='vectors') file = models.ForeignKey(File, related_name='vectors') diff --git a/server/collab/tasks.py b/server/collab/tasks.py index 4b6867c7e..58ee23ca7 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -1,26 +1,23 @@ from django.utils.timezone import now from django.db.models import F from models import Task, Vector, Match -import vectors +import matches from celery import shared_task @shared_task def match(file_id, project_id): - # doing some preperations - vector_types = [t[0] for t in Vector.TYPE_CHOICES] - # recording the task has started task = Task.objects.filter(task_id=match.request.id) - task.update(status=Task.STATUS_STARTED, progress_max=len(vector_types)) + task.update(status=Task.STATUS_STARTED, progress_max=len(matches.match_list)) print("Running task {}".format(match.request.id)) # TODO: order might be important here try: - for vector_type in vectors.vector_list: - print(vector_type) - vectors_filter = Vector.objects.filter(type=vector_type.vector_type) + for match_type in matches.match_list: + print(match_type) + vectors_filter = Vector.objects.filter(type=match_type.vector_type) source_vectors = vectors_filter.filter(file_id=file_id) if project_id: target_vectors = vectors_filter.filter(file_id__project_id=project_id) @@ -29,10 +26,10 @@ def match(file_id, project_id): print(target_vectors) print(source_vectors.all()) print(target_vectors.all()) - matches = vector_type.match(source_vectors, target_vectors, task.id) + match_results = match_type.match(source_vectors, target_vectors, task.id) match_objs = [Match(source, target, score=score, - type=vector_type.match_type) - for source, target, score in matches] + type=match_type.match_type) + for source, target, score in match_results] Match.objects.bulk_create(match_objs) print(list(match_objs)) From 144e51a31dd1dc55f52709e0172bc56befda11fe Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sat, 24 Sep 2016 01:35:46 +0300 Subject: [PATCH 47/68] Fix new action/dialog related issues introduced in 6913865 Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 41 +++++++++++++----------------- idaplugin/rematch/dialogs/match.py | 8 +++--- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index b3058326b..e01d1108a 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -12,59 +12,52 @@ class MatchAction(base.BoundFileAction): name = "&Match" + dialog = MatchDialog def __init__(self, *args, **kwargs): super(MatchAction, self).__init__(*args, **kwargs) self.function_gen = None self.pbar = None self.timer = None - self.source = None - self.target = None - self.methods = [] self.task_id = None self.pbar = None self.instance_set = [] - def get_functions(self): - if self.source == 'idb': + @staticmethod + def get_functions(source): + if source == 'idb': return idautils.Functions() - elif self.source == 'user': + elif source == 'user': raise NotImplementedError("All user functions are not currently " "supported as source value.") - elif self.source == 'single': + elif source == 'single': func = idaapi.choose_func("Choose function to match with database", idc.ScreenEA()) if not func: return None return [func.startEA] - elif self.source == 'range': + elif source == 'range': raise NotImplementedError("Range of addresses is not currently " "supported as source value.") raise ValueError("Invalid source value received from MatchDialog: {}" - "".format(self.source)) + "".format(source)) - def get_functions_count(self): - return len(list(self.get_functions())) + @classmethod + def get_functions_count(cls, source): + return len(list(cls.get_functions(source))) - def activate(self, ctx): - dialog = MatchDialog() - data, _, result = dialog.get() - - if result is None: - return - - self.source, self.target, self.methods = data - - function_gen = self.get_functions() + def submit_handler(self, source, target, methods): + # TODO: actually use target and methods + function_gen = self.get_functions(source) if not function_gen: - return + return False self.function_gen = enumerate(function_gen) self.pbar = QtWidgets.QProgressDialog() self.pbar.setLabelText("Processing IDB... You may continue working,\nbut " "please avoid making any ground-breaking changes.") - self.pbar.setRange(0, self.get_functions_count()) + self.pbar.setRange(0, self.get_functions_count(source)) self.pbar.setValue(0) self.pbar.canceled.connect(self.cancel_upload) self.pbar.accepted.connect(self.accepted_upload) @@ -73,6 +66,8 @@ def activate(self, ctx): self.timer.timeout.connect(self.perform_upload) self.timer.start(0) + return True + def perform_upload(self): try: i, offset = self.function_gen.next() diff --git a/idaplugin/rematch/dialogs/match.py b/idaplugin/rematch/dialogs/match.py index c7df5df11..b88549b29 100755 --- a/idaplugin/rematch/dialogs/match.py +++ b/idaplugin/rematch/dialogs/match.py @@ -37,11 +37,9 @@ def __init__(self, **kwargs): methodGbx.setLayout(methodLyt) self.base_layout.addWidget(methodGbx) - self.bottom_layout(self.accept, "&Start matching") + self.bottom_layout("&Start matching") def data(self): - source = self.get_radio_result(self.sourceGrp) - target = self.get_radio_result(self.targetGrp) methods = [] if self.identity.isChecked(): methods.append('identity') @@ -50,4 +48,6 @@ def data(self): if self.graph.isChecked(): methods.append('graph') - return source, target, methods + return {'source': self.get_radio_result(self.sourceGrp), + 'target': self.get_radio_result(self.targetGrp), + 'methods': methods} From 547b62f5e29d0acfc06c269363adc973e612f16b Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sun, 25 Sep 2016 18:48:48 +0300 Subject: [PATCH 48/68] Ditch my through replacement attempt, make matching flow work Signed-off-by: Nir Izraeli --- server/collab/matches.py | 45 ++++++++++++++++++++-------------------- server/collab/models.py | 14 +++++++++++-- server/collab/tasks.py | 21 +++++++++++-------- 3 files changed, 47 insertions(+), 33 deletions(-) diff --git a/server/collab/matches.py b/server/collab/matches.py index 3eab28648..e0e0c0b59 100644 --- a/server/collab/matches.py +++ b/server/collab/matches.py @@ -1,7 +1,8 @@ import numpy as np import scipy as sp -from collections import defaultdict +import collections +import itertools from sklearn.preprocessing import normalize @@ -13,30 +14,22 @@ def match(cls, source, target): "implemented".format(cls)) -class DummyMatch(Match): - match_type = 'dummy' - - @staticmethod - def match(source, target): - del source - del target - return [] - - class HashMatch(Match): @classmethod def match(cls, source, target): # unique_values = set(source_dict.values()) - flipped_rest = defaultdict(list) + flipped_rest = collections.defaultdict(list) # TODO: could be optimized by enumerating all identity matchs together - for target_id, target_data in target.values_list('id', 'data').iterator(): + target_values = target.values_list('id', 'instance_id', 'data').iterator() + for target_id, target_instance_id, target_data in target_values: # TODO: could be optimized by uncommenting next line as most 'target' # values won't be present in 'source' list # if v in unique_values: - flipped_rest[target_data].append(target_id) - for source_id, source_data in source.values_list('id', 'data').iterator(): - for target_id in flipped_rest.get(source_data, ()): - yield source_id, target_id, 100 + flipped_rest[target_data].append(target_id, target_instance_id) + source_values = source.values_list('id', 'instance_id', 'data').iterator() + for source_id, source_instance_id, source_data in source_values: + for target_id, target_instance_id in flipped_rest.get(source_data, ()): + yield source_id, source_instance_id, target_id, target_instance_id, 100 class AssemblyHashMatch(HashMatch): @@ -52,8 +45,13 @@ class MnemonicHashMatch(HashMatch): class HistogramMatch(Match): @staticmethod def match(source, target): - source_id, source_data = source.values('id', 'data') - target_id, target_data = target.values('id', 'data') + source_values = itertools.izip(*source.values('id', 'instance_id', 'data')) + target_values = itertools.izip(*target.values('id', 'instance_id', 'data')) + + if not source_values or not target_values: + return + source_id, source_instance_id, source_data = source_values + target_id, target_instance_id, target_data = target_values source_matrix = normalize(np.narray(source_data), axis=1, norm='l1') target_matrix = normalize(np.narray(target_data), axis=1, norm='l1') distances = sp.spatial.distance.cdist(source_matrix, target_matrix) @@ -61,8 +59,11 @@ def match(source, target): for target_i in range(target_matrix.shape[0]): source_id = source_id[source_i] target_id = target_id[target_i] + source_instance_id = source_instance_id[source_i] + target_instance_id = target_instance_id[target_i] score = distances[source_i][target_i] - yield source_id, target_id, score + yield (source_id, source_instance_id, target_id, target_instance_id, + score) class MnemonicHistogramMatch(HistogramMatch): @@ -74,5 +75,5 @@ class OpcodeHistogramMatch(HistogramMatch): vector_type = 'opcode_histogram' match_type = 'opcode_histogram' -match_list = [DummyMatch, AssemblyHashMatch, MnemonicHashMatch, - MnemonicHistogramMatch, OpcodeHistogramMatch] +match_list = [AssemblyHashMatch, MnemonicHashMatch, MnemonicHistogramMatch, + OpcodeHistogramMatch] diff --git a/server/collab/models.py b/server/collab/models.py index 145c5cd23..064cb33d5 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -51,7 +51,8 @@ class Instance(models.Model): type = models.CharField(max_length=16, choices=TYPE_CHOICES) offset = models.BigIntegerField() - matches = models.ManyToManyField('self', symmetrical=True) + matches = models.ManyToManyField('self', symmetrical=False, through='Match', + related_name='related_to+') def __unicode__(self): return "{} instance {} at {}".format(self.get_type_display(), self.offset, @@ -79,6 +80,9 @@ class Vector(models.Model): type_version = models.IntegerField() data = models.TextField() + matches = models.ManyToManyField('self', symmetrical=False, through='Match', + related_name='related_to+') + def __unicode__(self): return "{} vector version {} for {}".format(self.get_type_display(), self.type_version, @@ -123,7 +127,13 @@ class Task(models.Model): progress_max = models.PositiveSmallIntegerField(default=0) -class Match(Instance.matches.through()): +class Match(models.Model): + from_vector = models.ForeignKey(Vector, related_name='from_vector') + to_vector = models.ForeignKey(Vector, related_name='to_vector') + + from_instance = models.ForeignKey(Instance, related_name='from_instance') + to_instance = models.ForeignKey(Instance, related_name='to_instance') + task = models.ForeignKey(Task, db_index=True, related_name='matches') type = models.CharField(max_length=16, choices=Vector.TYPE_CHOICES) diff --git a/server/collab/tasks.py b/server/collab/tasks.py index 58ee23ca7..f968af3ed 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -19,23 +19,26 @@ def match(file_id, project_id): print(match_type) vectors_filter = Vector.objects.filter(type=match_type.vector_type) source_vectors = vectors_filter.filter(file_id=file_id) + target_vectors = vectors_filter if project_id: - target_vectors = vectors_filter.filter(file_id__project_id=project_id) + target_vectors = target_vectors.filter(file_id__project_id=project_id) target_vectors = target_vectors.exclude(file_id=file_id) print(source_vectors) print(target_vectors) print(source_vectors.all()) print(target_vectors.all()) - match_results = match_type.match(source_vectors, target_vectors, task.id) - match_objs = [Match(source, target, score=score, - type=match_type.match_type) - for source, target, score in match_results] - Match.objects.bulk_create(match_objs) - print(list(match_objs)) + if source_vectors.count() and target_vectors.count(): + match_results = match_type.match(source_vectors, target_vectors) + match_objs = [Match(source, target, score=score, + type=match_type.match_type) + for source, target, score in match_results] + print(type(Match)) + Match.objects.bulk_create(match_objs) + print(list(match_objs)) task.update(progress=F('progress') + 1) - except Exception as ex: + except Exception: task.update(status=Task.STATUS_FAILED, finished=now()) - raise ex + raise task.update(status=Task.STATUS_DONE, finished=now()) From f529d397874db0e2ac9c867dbd16e80930a5a951 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sun, 25 Sep 2016 18:49:49 +0300 Subject: [PATCH 49/68] make task progress bar visible without requiring actual progress Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 1 + 1 file changed, 1 insertion(+) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index e01d1108a..439fd5c58 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -117,6 +117,7 @@ def accepted_upload(self): self.pbar.setValue(int(r['progress'])) self.pbar.canceled.connect(self.cancel_task) self.pbar.accepted.connect(self.accepted_task) + self.pbar.show() self.timer = QtCore.QTimer() self.timer.timeout.connect(self.perform_task) From db2b09b31dd4fb200443568dffde94ded3b833a8 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 28 Sep 2016 12:52:34 +0300 Subject: [PATCH 50/68] more progress on making histogram matches work Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 3 +++ server/collab/matches.py | 21 ++++++++++++++++++--- server/collab/tasks.py | 23 +++++++++++++---------- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 439fd5c58..5d6653dc1 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -130,11 +130,14 @@ def perform_task(self): progress_max = int(r['progress_max']) progress = int(r['progress']) + status = r['status'] self.pbar.setRange(0, progress_max) self.pbar.setValue(progress) if progress >= progress_max: self.pbar.accept() + elif status == 'failed': + self.pbar.reject() except Exception: self.cancel_task() raise diff --git a/server/collab/matches.py b/server/collab/matches.py index e0e0c0b59..1b91e86bb 100644 --- a/server/collab/matches.py +++ b/server/collab/matches.py @@ -3,6 +3,7 @@ import collections import itertools +import json from sklearn.preprocessing import normalize @@ -25,7 +26,7 @@ def match(cls, source, target): # TODO: could be optimized by uncommenting next line as most 'target' # values won't be present in 'source' list # if v in unique_values: - flipped_rest[target_data].append(target_id, target_instance_id) + flipped_rest[target_data].append((target_id, target_instance_id)) source_values = source.values_list('id', 'instance_id', 'data').iterator() for source_id, source_instance_id, source_data in source_values: for target_id, target_instance_id in flipped_rest.get(source_data, ()): @@ -52,8 +53,22 @@ def match(source, target): return source_id, source_instance_id, source_data = source_values target_id, target_instance_id, target_data = target_values - source_matrix = normalize(np.narray(source_data), axis=1, norm='l1') - target_matrix = normalize(np.narray(target_data), axis=1, norm='l1') + try: + source_data = [json.loads(d) for d in source_data] + target_data = [json.loads(d) for d in target_data] + except Exception as ex: + print(ex) + print(d) + raise + print(source_data) + source_data = np.array(source_data) + target_data = np.array(target_data) + print(type(source_data)) + print(source_data) + print(source_data.shape) + print(source_data.dtype) + source_matrix = normalize(source_data, axis=1, norm='l1') + target_matrix = normalize(target_data, axis=1, norm='l1') distances = sp.spatial.distance.cdist(source_matrix, target_matrix) for source_i in range(source_matrix.shape[0]): for target_i in range(target_matrix.shape[0]): diff --git a/server/collab/tasks.py b/server/collab/tasks.py index f968af3ed..3e7a1210e 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -10,6 +10,7 @@ def match(file_id, project_id): # recording the task has started task = Task.objects.filter(task_id=match.request.id) + task_id = task.only('id')[0].id task.update(status=Task.STATUS_STARTED, progress_max=len(matches.match_list)) print("Running task {}".format(match.request.id)) @@ -23,18 +24,10 @@ def match(file_id, project_id): if project_id: target_vectors = target_vectors.filter(file_id__project_id=project_id) target_vectors = target_vectors.exclude(file_id=file_id) - print(source_vectors) - print(target_vectors) - print(source_vectors.all()) - print(target_vectors.all()) if source_vectors.count() and target_vectors.count(): - match_results = match_type.match(source_vectors, target_vectors) - match_objs = [Match(source, target, score=score, - type=match_type.match_type) - for source, target, score in match_results] - print(type(Match)) + match_objs = gen_match_objs(task_id, match_type, source_vectors, + target_vectors) Match.objects.bulk_create(match_objs) - print(list(match_objs)) task.update(progress=F('progress') + 1) except Exception: @@ -42,3 +35,13 @@ def match(file_id, project_id): raise task.update(status=Task.STATUS_DONE, finished=now()) + + +def gen_match_objs(task_id, match_type, source_vectors, target_vectors): + matches = match_type.match(source_vectors, target_vectors) + for source, source_instance, target, target_instance, score in matches: + mat = Match(task_id=task_id, from_vector_id=source, to_vector_id=target, + from_instance_id=source_instance, + to_instance_id=target_instance, + score=score, type=match_type.match_type) + yield mat From 775bd416b9fc1a731b48a427d819f35686eeb335 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 5 Oct 2016 05:08:52 +0300 Subject: [PATCH 51/68] Time each match type Signed-off-by: Nir Izraeli --- server/collab/tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/collab/tasks.py b/server/collab/tasks.py index 3e7a1210e..df94a4c90 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -18,6 +18,7 @@ def match(file_id, project_id): try: for match_type in matches.match_list: print(match_type) + start = now() vectors_filter = Vector.objects.filter(type=match_type.vector_type) source_vectors = vectors_filter.filter(file_id=file_id) target_vectors = vectors_filter @@ -28,6 +29,7 @@ def match(file_id, project_id): match_objs = gen_match_objs(task_id, match_type, source_vectors, target_vectors) Match.objects.bulk_create(match_objs) + print("\tTook: {}".format(now() - start)) task.update(progress=F('progress') + 1) except Exception: From fbc8078a84c1fabf8abaefc7be0d5b98f9d8ab38 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 5 Oct 2016 05:21:34 +0300 Subject: [PATCH 52/68] set Task model progress_max nullable and handle optional None value in task progress bar Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 13 +++++++------ server/collab/models.py | 2 +- server/collab/tasks.py | 3 ++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 5d6653dc1..d1607bfae 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -113,7 +113,7 @@ def accepted_upload(self): self.pbar = QtWidgets.QProgressDialog() self.pbar.setLabelText("Waiting for remote matching... You may continue " "working without any limitations.") - self.pbar.setRange(0, int(r['progress_max'])) + self.pbar.setRange(0, int(r['progress_max']) if r['progress_max'] else 0) self.pbar.setValue(int(r['progress'])) self.pbar.canceled.connect(self.cancel_task) self.pbar.accepted.connect(self.accepted_task) @@ -128,15 +128,16 @@ def perform_task(self): r = network.query("GET", "collab/tasks/{}/".format(self.task_id), json=True) - progress_max = int(r['progress_max']) + progress_max = int(r['progress_max']) if r['progress_max'] else None progress = int(r['progress']) status = r['status'] - self.pbar.setRange(0, progress_max) + if progress_max: + self.pbar.setMaximum(progress_max) + if progress >= progress_max: + self.pbar.accept() self.pbar.setValue(progress) - if progress >= progress_max: - self.pbar.accept() - elif status == 'failed': + if status == 'failed': self.pbar.reject() except Exception: self.cancel_task() diff --git a/server/collab/models.py b/server/collab/models.py index 064cb33d5..d75b91b6f 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -124,7 +124,7 @@ class Task(models.Model): file = models.ForeignKey(File, related_name='tasks') progress = models.PositiveSmallIntegerField(default=0) - progress_max = models.PositiveSmallIntegerField(default=0) + progress_max = models.PositiveSmallIntegerField(null=True) class Match(models.Model): diff --git a/server/collab/tasks.py b/server/collab/tasks.py index df94a4c90..acda93e27 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -11,7 +11,8 @@ def match(file_id, project_id): # recording the task has started task = Task.objects.filter(task_id=match.request.id) task_id = task.only('id')[0].id - task.update(status=Task.STATUS_STARTED, progress_max=len(matches.match_list)) + task.update(status=Task.STATUS_STARTED, progress=0, + progress_max=len(matches.match_list)) print("Running task {}".format(match.request.id)) # TODO: order might be important here From 072ccc91a397a77ad94c2fedda1e2e0ab898fa97 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 5 Oct 2016 05:39:44 +0300 Subject: [PATCH 53/68] more fixes to histogram match backend Signed-off-by: Nir Izraeli --- server/collab/matches.py | 60 +++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/server/collab/matches.py b/server/collab/matches.py index 1b91e86bb..dd1a62a4a 100644 --- a/server/collab/matches.py +++ b/server/collab/matches.py @@ -1,11 +1,11 @@ -import numpy as np -import scipy as sp - import collections import itertools import json -from sklearn.preprocessing import normalize +import sklearn as skl +import sklearn.preprocessing +import sklearn.feature_extraction +import sklearn.metrics class Match: @@ -46,36 +46,32 @@ class MnemonicHashMatch(HashMatch): class HistogramMatch(Match): @staticmethod def match(source, target): - source_values = itertools.izip(*source.values('id', 'instance_id', 'data')) - target_values = itertools.izip(*target.values('id', 'instance_id', 'data')) - - if not source_values or not target_values: - return - source_id, source_instance_id, source_data = source_values - target_id, target_instance_id, target_data = target_values - try: - source_data = [json.loads(d) for d in source_data] - target_data = [json.loads(d) for d in target_data] - except Exception as ex: - print(ex) - print(d) - raise - print(source_data) - source_data = np.array(source_data) - target_data = np.array(target_data) - print(type(source_data)) - print(source_data) - print(source_data.shape) - print(source_data.dtype) - source_matrix = normalize(source_data, axis=1, norm='l1') - target_matrix = normalize(target_data, axis=1, norm='l1') - distances = sp.spatial.distance.cdist(source_matrix, target_matrix) + source_values = itertools.izip(*source.values_list('id', 'instance_id', + 'data')) + target_values = itertools.izip(*target.values_list('id', 'instance_id', + 'data')) + + source_ids, source_instance_ids, source_data = source_values + target_ids, target_instance_ids, target_data = target_values + dictvect = skl.feature_extraction.DictVectorizer() + source_data = dictvect.fit_transform([json.loads(d) for d in source_data]) + target_data = dictvect.transform([json.loads(d) for d in target_data]) + source_matrix = skl.preprocessing.normalize(source_data, axis=1, norm='l1') + target_matrix = skl.preprocessing.normalize(target_data, axis=1, norm='l1') + print(type(source_matrix)) + print(source_matrix.shape) + print(type(target_matrix)) + print(target_matrix.shape) + distances = skl.metrics.pairwise.pairwise_distances(source_matrix, + target_matrix) + print(type(distances)) + print(distances.shape) for source_i in range(source_matrix.shape[0]): for target_i in range(target_matrix.shape[0]): - source_id = source_id[source_i] - target_id = target_id[target_i] - source_instance_id = source_instance_id[source_i] - target_instance_id = target_instance_id[target_i] + source_id = source_ids[source_i] + target_id = target_ids[target_i] + source_instance_id = source_instance_ids[source_i] + target_instance_id = target_instance_ids[target_i] score = distances[source_i][target_i] yield (source_id, source_instance_id, target_id, target_instance_id, score) From d8edd71575638f66b1033a16e022e4d8ff46046b Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sun, 16 Oct 2016 14:50:06 -0700 Subject: [PATCH 54/68] Another attempt at speeding up the matches process Signed-off-by: Nir Izraeli --- server/collab/matches.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/server/collab/matches.py b/server/collab/matches.py index dd1a62a4a..2ab2a24f4 100644 --- a/server/collab/matches.py +++ b/server/collab/matches.py @@ -2,10 +2,10 @@ import itertools import json +import numpy as np import sklearn as skl -import sklearn.preprocessing -import sklearn.feature_extraction -import sklearn.metrics +import sklearn.preprocessing # noqa flake8 importing as a different name +import sklearn.feature_extraction # noqa flake8 importing as a different name class Match: @@ -19,6 +19,7 @@ class HashMatch(Match): @classmethod def match(cls, source, target): # unique_values = set(source_dict.values()) + print(source.count(), target.count()) flipped_rest = collections.defaultdict(list) # TODO: could be optimized by enumerating all identity matchs together target_values = target.values_list('id', 'instance_id', 'data').iterator() @@ -62,17 +63,18 @@ def match(source, target): print(source_matrix.shape) print(type(target_matrix)) print(target_matrix.shape) - distances = skl.metrics.pairwise.pairwise_distances(source_matrix, - target_matrix) - print(type(distances)) - print(distances.shape) for source_i in range(source_matrix.shape[0]): + source_vector = source_matrix[source_i].toarray() + source_id = source_ids[source_i] + source_instance_id = source_instance_ids[source_i] + print(source_i) + for target_i in range(target_matrix.shape[0]): - source_id = source_ids[source_i] + target_vector = target_matrix[target_i].toarray() target_id = target_ids[target_i] - source_instance_id = source_instance_ids[source_i] target_instance_id = target_instance_ids[target_i] - score = distances[source_i][target_i] + + score = np.linalg.norm(source_vector - target_vector) yield (source_id, source_instance_id, target_id, target_instance_id, score) From 6633342f33cdcc99c5727cbb06799f55d162f469 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sun, 16 Oct 2016 17:22:15 -0700 Subject: [PATCH 55/68] more robust status handling in task progress query Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index d1607bfae..2f568f60c 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -131,14 +131,14 @@ def perform_task(self): progress_max = int(r['progress_max']) if r['progress_max'] else None progress = int(r['progress']) status = r['status'] - if progress_max: + if status == 'failed': + self.pbar.reject() + elif progress_max: self.pbar.setMaximum(progress_max) if progress >= progress_max: self.pbar.accept() - self.pbar.setValue(progress) - - if status == 'failed': - self.pbar.reject() + else: + self.pbar.setValue(progress) except Exception: self.cancel_task() raise From e1bb374336fcc5dd0e91183a7d907de80b03a2a6 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Tue, 18 Oct 2016 17:50:23 -0700 Subject: [PATCH 56/68] send more parameters in a match task creation Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 2f568f60c..457a1add0 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -48,6 +48,10 @@ def get_functions_count(cls, source): return len(list(cls.get_functions(source))) def submit_handler(self, source, target, methods): + self.source = source + self.target = target + self.methods = methods + # TODO: actually use target and methods function_gen = self.get_functions(source) if not function_gen: @@ -106,7 +110,7 @@ def accepted_upload(self): self.cancel_upload() params = {'action': 'commit', 'file': netnode.bound_file_id, - 'project': None} + 'project': None, 'target': self.target, 'matches': self.matches} r = network.query("POST", "collab/tasks/", params=params, json=True) self.task_id = r['id'] From 7718f3962418977a08e7957ed78c4b5f1e25f490 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 21 Oct 2016 16:39:35 -0700 Subject: [PATCH 57/68] More adjustments to the source/target/methods parameters Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 27 +++++++++++++-------------- server/collab/models.py | 6 ++++-- server/collab/serializers.py | 6 ++---- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 457a1add0..8786fabbb 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -23,29 +23,27 @@ def __init__(self, *args, **kwargs): self.pbar = None self.instance_set = [] - @staticmethod - def get_functions(source): - if source == 'idb': + def get_functions(self): + if self.source == 'idb': return idautils.Functions() - elif source == 'user': + elif self.source == 'user': raise NotImplementedError("All user functions are not currently " "supported as source value.") - elif source == 'single': + elif self.source == 'single': func = idaapi.choose_func("Choose function to match with database", idc.ScreenEA()) if not func: return None return [func.startEA] - elif source == 'range': + elif self.source == 'range': raise NotImplementedError("Range of addresses is not currently " "supported as source value.") raise ValueError("Invalid source value received from MatchDialog: {}" - "".format(source)) + "".format(self.source)) - @classmethod - def get_functions_count(cls, source): - return len(list(cls.get_functions(source))) + def get_functions_count(self): + return len(list(self.get_functions())) def submit_handler(self, source, target, methods): self.source = source @@ -53,7 +51,7 @@ def submit_handler(self, source, target, methods): self.methods = methods # TODO: actually use target and methods - function_gen = self.get_functions(source) + function_gen = self.get_functions() if not function_gen: return False @@ -61,7 +59,7 @@ def submit_handler(self, source, target, methods): self.pbar = QtWidgets.QProgressDialog() self.pbar.setLabelText("Processing IDB... You may continue working,\nbut " "please avoid making any ground-breaking changes.") - self.pbar.setRange(0, self.get_functions_count(source)) + self.pbar.setRange(0, self.get_functions_count()) self.pbar.setValue(0) self.pbar.canceled.connect(self.cancel_upload) self.pbar.accepted.connect(self.accepted_upload) @@ -109,8 +107,9 @@ def cancel_upload(self): def accepted_upload(self): self.cancel_upload() - params = {'action': 'commit', 'file': netnode.bound_file_id, - 'project': None, 'target': self.target, 'matches': self.matches} + params = {'action': 'commit', 'source_file': netnode.bound_file_id, + 'target_project': None, 'target_file': None, + 'source': self.source, 'methods': self.methods} r = network.query("POST", "collab/tasks/", params=params, json=True) self.task_id = r['id'] diff --git a/server/collab/models.py b/server/collab/models.py index d75b91b6f..a70bebc44 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -120,8 +120,10 @@ class Task(models.Model): choices=STATUS_CHOICES) action = models.CharField(max_length=16, choices=ACTION_CHOICES) - project = models.ForeignKey(Project, null=True, related_name='tasks') - file = models.ForeignKey(File, related_name='tasks') + source_file = models.ForeignKey(File, related_name='source_tasks') + # TODO: make sure only at least one of target_file/target_project is null + target_file = models.ForeignKey(File, null=True) + target_project = models.ForeignKey(Project, null=True) progress = models.PositiveSmallIntegerField(default=0) progress_max = models.PositiveSmallIntegerField(null=True) diff --git a/server/collab/serializers.py b/server/collab/serializers.py index f2c727dcd..541ce0d88 100644 --- a/server/collab/serializers.py +++ b/server/collab/serializers.py @@ -31,13 +31,11 @@ class TaskSerializer(serializers.ModelSerializer): progress = serializers.ReadOnlyField() progress_max = serializers.ReadOnlyField() - project = serializers.PrimaryKeyRelatedField(queryset=Project.objects.all(), - allow_null=True) - class Meta: model = Task fields = ('id', 'task_id', 'created', 'finished', 'owner', 'status', - 'action', 'project', 'file', 'progress', 'progress_max') + 'action', 'target_project', 'target_file', 'source_file', + 'progress', 'progress_max') class InstanceSerializer(serializers.ModelSerializer): From 13b696fb0313cc2b24c482d8a3cd46ad3d51867e Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 21 Oct 2016 17:19:08 -0700 Subject: [PATCH 58/68] Make item selection combobox a dialog.Base method Signed-off-by: Nir Izraeli --- idaplugin/rematch/dialogs/base.py | 10 ++++++++++ idaplugin/rematch/dialogs/project.py | 7 +------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/idaplugin/rematch/dialogs/base.py b/idaplugin/rematch/dialogs/base.py index aec6f7414..1eab73901 100755 --- a/idaplugin/rematch/dialogs/base.py +++ b/idaplugin/rematch/dialogs/base.py @@ -44,6 +44,16 @@ def add_radio_group(self, title, *radios, **kwargs): return radiogroup + def create_item_select(self, item, allow_none=True): + response = network.query("GET", "collab/{}/".format(item), json=True) + combobox = QtWidgets.QComboBox() + for idx, obj in enumerate(response): + text = "{} ({})".format(obj['name'], obj['id']) + combobox.insertItem(idx, text, int(obj['id'])) + if allow_none: + combobox.insertItem(0, "None", None) + return combobox + def get_radio_result(self, group): group_ids = self.radio_groups[group] return group_ids[group.checkedId()] diff --git a/idaplugin/rematch/dialogs/project.py b/idaplugin/rematch/dialogs/project.py index e87201573..d32566706 100755 --- a/idaplugin/rematch/dialogs/project.py +++ b/idaplugin/rematch/dialogs/project.py @@ -54,12 +54,7 @@ def __init__(self, **kwargs): gridLyt.addWidget(QtWidgets.QLabel("Description:"), 2, 0) gridLyt.addWidget(QtWidgets.QLabel("MD5 hash:"), 3, 0) - response = network.query("GET", "collab/projects/", json=True) - self.projectCbb = QtWidgets.QComboBox() - for idx, project in enumerate(response): - text = "{} ({})".format(project['name'], project['id']) - self.projectCbb.insertItem(idx, text, int(project['id'])) - self.projectCbb.insertItem(0, "None", None) + self.projectCbb = self.create_item_select('projects') gridLyt.addWidget(self.projectCbb, 0, 1) self.nameTxt = QtWidgets.QLineEdit() From e489af686f3febecef3617b7c150b96bb4d897dd Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 21 Oct 2016 17:20:15 -0700 Subject: [PATCH 59/68] Add optional choice-specific additional input to the radio group creation function Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 4 ++++ idaplugin/rematch/dialogs/base.py | 25 +++++++++++++++++++------ idaplugin/rematch/dialogs/match.py | 22 ++++++++++++---------- idaplugin/rematch/dialogs/project.py | 1 - 4 files changed, 35 insertions(+), 17 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 8786fabbb..084db4b16 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -23,6 +23,10 @@ def __init__(self, *args, **kwargs): self.pbar = None self.instance_set = [] + self.source = None + self.target = None + self.methods = None + def get_functions(self): if self.source == 'idb': return idautils.Functions() diff --git a/idaplugin/rematch/dialogs/base.py b/idaplugin/rematch/dialogs/base.py index 1eab73901..1d854e827 100755 --- a/idaplugin/rematch/dialogs/base.py +++ b/idaplugin/rematch/dialogs/base.py @@ -20,31 +20,42 @@ def __init__(self, title="", reject_handler=None, submit_handler=None, self.base_layout = QtWidgets.QVBoxLayout() self.setLayout(self.base_layout) - def add_radio_group(self, title, *radios, **kwargs): + def create_radio_group(self, title, *radios, **kwargs): radiogroup = QtWidgets.QButtonGroup() groupbox = QtWidgets.QGroupBox(title) - layout = QtWidgets.QVBoxLayout() + layout = QtWidgets.QGridLayout() checked = kwargs.pop('checked', None) self.radio_groups[radiogroup] = [] for i, radio in enumerate(radios): - radio_name, radio_id = radio + radio_name, radio_id, radio_extra_controls = radio radio_widget = QtWidgets.QRadioButton(radio_name) + radiogroup.addButton(radio_widget, i) + layout.addWidget(radio_widget, i, 0) + if radio_extra_controls is not None: + layout.addWidget(radio_extra_controls, i, 1) + # if extra controller comes disabled, make sure it stays that way + # and also make the radio box disabled + if radio_extra_controls.isEnabled(): + radio_widget.toggled.connect(radio_extra_controls.setEnabled) + radio_extra_controls.setEnabled(False) + else: + radio_widget.setEnabled(False) + # if checked is supplied, set correct radio as checked # else set first radio as checked` if (checked is None and i == 0) or checked == radio_id: radio_widget.setChecked(True) - radiogroup.addButton(radio_widget, i) - layout.addWidget(radio_widget) self.radio_groups[radiogroup].append(radio_id) groupbox.setLayout(layout) self.base_layout.addWidget(groupbox) return radiogroup - def create_item_select(self, item, allow_none=True): + @staticmethod + def create_item_select(item, allow_none=True): response = network.query("GET", "collab/{}/".format(item), json=True) combobox = QtWidgets.QComboBox() for idx, obj in enumerate(response): @@ -52,6 +63,8 @@ def create_item_select(self, item, allow_none=True): combobox.insertItem(idx, text, int(obj['id'])) if allow_none: combobox.insertItem(0, "None", None) + elif combobox.count() == 0: + combobox.setEnabled(False) return combobox def get_radio_result(self, group): diff --git a/idaplugin/rematch/dialogs/match.py b/idaplugin/rematch/dialogs/match.py index b88549b29..4f721d135 100755 --- a/idaplugin/rematch/dialogs/match.py +++ b/idaplugin/rematch/dialogs/match.py @@ -11,16 +11,18 @@ class MatchDialog(base.BaseDialog): def __init__(self, **kwargs): super(MatchDialog, self).__init__(title="Match", **kwargs) - self.sourceGrp = self.add_radio_group("Match source", - ("Entire IDB", 'idb'), - ("User functions", 'user'), - ("Single function", 'single'), - ("Range", 'range')) - - self.targetGrp = self.add_radio_group("Match target", - ("Entire DB", 'db'), - ("Project", 'project'), - ("Another file", 'file')) + choices = [("Entire IDB", 'idb', None), + ("User functions", 'user', None), + ("Single function", 'single', None), + ("Range", 'range', None)] + self.sourceGrp = self.create_radio_group("Match source", *choices) + + self.target_project = self.create_item_select('projects', allow_none=False) + self.target_file = self.create_item_select('files', allow_none=False) + choices = [("Entire DB", 'db', None), + ("Project", 'project', self.target_project), + ("Another file", 'file', self.target_file)] + self.targetGrp = self.create_radio_group("Match target", *choices) self.identity = QtWidgets.QCheckBox("Identify matches") self.fuzzy = QtWidgets.QCheckBox("Fuzzy matches") diff --git a/idaplugin/rematch/dialogs/project.py b/idaplugin/rematch/dialogs/project.py index d32566706..67ba070d5 100755 --- a/idaplugin/rematch/dialogs/project.py +++ b/idaplugin/rematch/dialogs/project.py @@ -4,7 +4,6 @@ from . import base from .. import netnode -from .. import network class AddProjectDialog(base.BaseDialog): From 27da9ff6294fcee9decb46b279d9cf1c9c599969 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 21 Oct 2016 17:44:18 -0700 Subject: [PATCH 60/68] Send selected target_project and target_file to server Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 10 ++++++++-- idaplugin/rematch/dialogs/match.py | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 084db4b16..f1e082150 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -25,6 +25,8 @@ def __init__(self, *args, **kwargs): self.source = None self.target = None + self.target_project = None + self.target_file = None self.methods = None def get_functions(self): @@ -49,9 +51,12 @@ def get_functions(self): def get_functions_count(self): return len(list(self.get_functions())) - def submit_handler(self, source, target, methods): + def submit_handler(self, source, target, target_project, target_file, + methods): self.source = source self.target = target + self.target_project = target_project if target == 'project' else None + self.target_file = target_file if target == 'file' else None self.methods = methods # TODO: actually use target and methods @@ -112,7 +117,8 @@ def accepted_upload(self): self.cancel_upload() params = {'action': 'commit', 'source_file': netnode.bound_file_id, - 'target_project': None, 'target_file': None, + 'target_project': self.target_project, + 'target_file': self.target_file, 'source': self.source, 'methods': self.methods} r = network.query("POST", "collab/tasks/", params=params, json=True) self.task_id = r['id'] diff --git a/idaplugin/rematch/dialogs/match.py b/idaplugin/rematch/dialogs/match.py index 4f721d135..11aff860d 100755 --- a/idaplugin/rematch/dialogs/match.py +++ b/idaplugin/rematch/dialogs/match.py @@ -52,4 +52,6 @@ def data(self): return {'source': self.get_radio_result(self.sourceGrp), 'target': self.get_radio_result(self.targetGrp), + 'target_project': self.target_project.currentData(), + 'target_file': self.target_file.currentData(), 'methods': methods} From 90ceb59bf69cc636245842740d5112132821d42f Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Fri, 21 Oct 2016 21:36:37 -0700 Subject: [PATCH 61/68] Handle target_project and target_file match parameters on backend plus some small code simplifications Signed-off-by: Nir Izraeli --- .flake8 | 2 +- server/collab/tasks.py | 23 +++++++++++++++-------- server/collab/views.py | 5 +---- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/.flake8 b/.flake8 index ec01eba50..9b70c3f9f 100644 --- a/.flake8 +++ b/.flake8 @@ -1,2 +1,2 @@ [flake8] -ignore = E111, E113, E114, E121, E125 +ignore = E111, E113, E114, E121, E125, E127 diff --git a/server/collab/tasks.py b/server/collab/tasks.py index acda93e27..028a7c659 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -7,25 +7,32 @@ @shared_task -def match(file_id, project_id): +def match(): # recording the task has started task = Task.objects.filter(task_id=match.request.id) - task_id = task.only('id')[0].id task.update(status=Task.STATUS_STARTED, progress=0, progress_max=len(matches.match_list)) + # get input parameters + task_id, source_file, target_project, target_file = \ + task.values('id', 'source_file_id', 'target_project_id', 'target_file_id') + + base_source_vectors = Vector.objects.filter(file_id=source_file) + base_target_vectors = Vector.objects.exclude(file_id=source_file) + if target_project: + base_target_vectors = base_target_vectors.filter(project_id=target_project) + if target_file: + base_target_vectors = base_target_vectors.filter(file_id=target_file) + print("Running task {}".format(match.request.id)) # TODO: order might be important here try: for match_type in matches.match_list: print(match_type) start = now() - vectors_filter = Vector.objects.filter(type=match_type.vector_type) - source_vectors = vectors_filter.filter(file_id=file_id) - target_vectors = vectors_filter - if project_id: - target_vectors = target_vectors.filter(file_id__project_id=project_id) - target_vectors = target_vectors.exclude(file_id=file_id) + source_vectors = base_source_vectors.filter(type=match_type.vector_type) + target_vectors = base_target_vectors.filter(type=match_type.vector_type) + if source_vectors.count() and target_vectors.count(): match_objs = gen_match_objs(task_id, match_type, source_vectors, target_vectors) diff --git a/server/collab/views.py b/server/collab/views.py index 2c65c8a3e..255ce00e1 100644 --- a/server/collab/views.py +++ b/server/collab/views.py @@ -45,10 +45,7 @@ class TaskViewSet(mixins.CreateModelMixin, mixins.RetrieveModelMixin, IsOwnerOrReadOnly) def perform_create(self, serializer): - file_id = serializer.validated_data['file'].id - project = serializer.validated_data['project'] - project_id = project.id if project else None - result = tasks.match.delay(file_id, project_id) + result = tasks.match.delay() serializer.save(owner=self.request.user, task_id=result.id) From 2c7bf08fac5f562eb1811a9b3fce1380dc7795ed Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 26 Oct 2016 11:07:48 -0700 Subject: [PATCH 62/68] Allow excuding values in create_item_select and exclude current file in target_file selection Signed-off-by: Nir Izraeli --- idaplugin/rematch/dialogs/base.py | 4 +++- idaplugin/rematch/dialogs/match.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/idaplugin/rematch/dialogs/base.py b/idaplugin/rematch/dialogs/base.py index 1d854e827..dd652a8a5 100755 --- a/idaplugin/rematch/dialogs/base.py +++ b/idaplugin/rematch/dialogs/base.py @@ -55,10 +55,12 @@ def create_radio_group(self, title, *radios, **kwargs): return radiogroup @staticmethod - def create_item_select(item, allow_none=True): + def create_item_select(item, allow_none=True, exclude=None): response = network.query("GET", "collab/{}/".format(item), json=True) combobox = QtWidgets.QComboBox() for idx, obj in enumerate(response): + if exclude and obj['name'] in exclude or obj['id'] in exclude: + continue text = "{} ({})".format(obj['name'], obj['id']) combobox.insertItem(idx, text, int(obj['id'])) if allow_none: diff --git a/idaplugin/rematch/dialogs/match.py b/idaplugin/rematch/dialogs/match.py index 11aff860d..2b6b87c2c 100755 --- a/idaplugin/rematch/dialogs/match.py +++ b/idaplugin/rematch/dialogs/match.py @@ -5,6 +5,7 @@ QtWidgets = QtGui from . import base +from .. import netnode class MatchDialog(base.BaseDialog): @@ -18,7 +19,8 @@ def __init__(self, **kwargs): self.sourceGrp = self.create_radio_group("Match source", *choices) self.target_project = self.create_item_select('projects', allow_none=False) - self.target_file = self.create_item_select('files', allow_none=False) + self.target_file = self.create_item_select('files', allow_none=False, + exclude=[netnode.bound_file_id]) choices = [("Entire DB", 'db', None), ("Project", 'project', self.target_project), ("Another file", 'file', self.target_file)] From 1b80518bce9c70976756788efb68c933024c341d Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 26 Oct 2016 11:18:02 -0700 Subject: [PATCH 63/68] import qt from idasix Signed-off-by: Nir Izraeli --- idaplugin/rematch/dialogs/match.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/idaplugin/rematch/dialogs/match.py b/idaplugin/rematch/dialogs/match.py index 2b6b87c2c..d05dd72ef 100755 --- a/idaplugin/rematch/dialogs/match.py +++ b/idaplugin/rematch/dialogs/match.py @@ -1,8 +1,4 @@ -try: - from PyQt5 import QtWidgets -except ImportError: - from PySide import QtGui - QtWidgets = QtGui +from ..idasix import QtWidgets from . import base from .. import netnode From 2d4521a75f99b7f5112601e8bb2cb9b719d1c8ea Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Wed, 26 Oct 2016 19:20:31 -0700 Subject: [PATCH 64/68] Add support for range and single function sources Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 47 ++++++++++------- idaplugin/rematch/dialogs/base.py | 81 ++++++++++++++++++++++++++++-- idaplugin/rematch/dialogs/match.py | 9 +++- server/collab/models.py | 3 ++ server/collab/serializers.py | 2 +- server/collab/tasks.py | 47 ++++++++++------- 6 files changed, 145 insertions(+), 44 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index f1e082150..10941223c 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -1,7 +1,5 @@ from ..idasix import QtCore, QtWidgets import idautils -import idaapi -import idc from ..dialogs.match import MatchDialog @@ -24,6 +22,8 @@ def __init__(self, *args, **kwargs): self.instance_set = [] self.source = None + self.source_single = None + self.source_range = None self.target = None self.target_project = None self.target_file = None @@ -31,44 +31,39 @@ def __init__(self, *args, **kwargs): def get_functions(self): if self.source == 'idb': - return idautils.Functions() + return set(idautils.Functions()) elif self.source == 'user': raise NotImplementedError("All user functions are not currently " "supported as source value.") elif self.source == 'single': - func = idaapi.choose_func("Choose function to match with database", - idc.ScreenEA()) - if not func: - return None - return [func.startEA] + return [self.source_single] elif self.source == 'range': - raise NotImplementedError("Range of addresses is not currently " - "supported as source value.") + return set(idautils.Functions(self.source_range[0], + self.source_range[1])) raise ValueError("Invalid source value received from MatchDialog: {}" "".format(self.source)) - def get_functions_count(self): - return len(list(self.get_functions())) - - def submit_handler(self, source, target, target_project, target_file, - methods): + def submit_handler(self, source, source_single, source_range, target, + target_project, target_file, methods): self.source = source + self.source_single = source_single + self.source_range = source_range self.target = target self.target_project = target_project if target == 'project' else None self.target_file = target_file if target == 'file' else None self.methods = methods # TODO: actually use target and methods - function_gen = self.get_functions() - if not function_gen: + functions = self.get_functions() + if not functions: return False - self.function_gen = enumerate(function_gen) + self.function_gen = enumerate(functions) self.pbar = QtWidgets.QProgressDialog() self.pbar.setLabelText("Processing IDB... You may continue working,\nbut " "please avoid making any ground-breaking changes.") - self.pbar.setRange(0, self.get_functions_count()) + self.pbar.setRange(0, len(functions)) self.pbar.setValue(0) self.pbar.canceled.connect(self.cancel_upload) self.pbar.accepted.connect(self.accepted_upload) @@ -116,7 +111,19 @@ def cancel_upload(self): def accepted_upload(self): self.cancel_upload() + if self.source == 'idb': + self.source_range = [None, None] + elif self.source == 'single': + self.source_range = [self.source_single, self.source_single] + elif self.source == 'range': + pass + else: + raise NotImplementedError("Unsupported source type encountered in task " + "creation") + params = {'action': 'commit', 'source_file': netnode.bound_file_id, + 'source_start': self.source_range[0], + 'source_end': self.source_range[1], 'target_project': self.target_project, 'target_file': self.target_file, 'source': self.source, 'methods': self.methods} @@ -146,6 +153,8 @@ def perform_task(self): status = r['status'] if status == 'failed': self.pbar.reject() + self.timer.stop() + self.timer = None elif progress_max: self.pbar.setMaximum(progress_max) if progress >= progress_max: diff --git a/idaplugin/rematch/dialogs/base.py b/idaplugin/rematch/dialogs/base.py index dd652a8a5..31866bd93 100755 --- a/idaplugin/rematch/dialogs/base.py +++ b/idaplugin/rematch/dialogs/base.py @@ -1,4 +1,7 @@ -from ..idasix import QtWidgets +from ..idasix import QtWidgets, QtCore + +import idaapi +import idc from .. import network @@ -24,6 +27,7 @@ def create_radio_group(self, title, *radios, **kwargs): radiogroup = QtWidgets.QButtonGroup() groupbox = QtWidgets.QGroupBox(title) layout = QtWidgets.QGridLayout() + layout.setColumnStretch(1, 1) checked = kwargs.pop('checked', None) self.radio_groups[radiogroup] = [] @@ -32,9 +36,9 @@ def create_radio_group(self, title, *radios, **kwargs): radio_widget = QtWidgets.QRadioButton(radio_name) radiogroup.addButton(radio_widget, i) - layout.addWidget(radio_widget, i, 0) + layout.addWidget(radio_widget, i, 0, QtCore.Qt.AlignTop) if radio_extra_controls is not None: - layout.addWidget(radio_extra_controls, i, 1) + layout.addWidget(radio_extra_controls, i, 1, QtCore.Qt.AlignTop) # if extra controller comes disabled, make sure it stays that way # and also make the radio box disabled if radio_extra_controls.isEnabled(): @@ -59,7 +63,7 @@ def create_item_select(item, allow_none=True, exclude=None): response = network.query("GET", "collab/{}/".format(item), json=True) combobox = QtWidgets.QComboBox() for idx, obj in enumerate(response): - if exclude and obj['name'] in exclude or obj['id'] in exclude: + if exclude and (obj['name'] in exclude or obj['id'] in exclude): continue text = "{} ({})".format(obj['name'], obj['id']) combobox.insertItem(idx, text, int(obj['id'])) @@ -152,3 +156,72 @@ def get(cls, **kwargs): data = dialog.data() return data, result == QtWidgets.QDialog.Accepted + + +class QFunctionSelect(QtWidgets.QWidget): + changed = QtCore.Signal() + + def __init__(self, text_max_length=30, **kwargs): + super(QFunctionSelect, self).__init__(**kwargs) + + self.text_max = text_max_length + + self.label = QtWidgets.QPushButton() + self.label.clicked.connect(self.label_clicked) + self.label.setFlat(True) + self.btn = QtWidgets.QPushButton("...") + self.btn.setMaximumWidth(20) + self.btn.clicked.connect(self.btn_clicked) + + self.set_func(idaapi.get_func(idc.ScreenEA())) + + layout = QtWidgets.QHBoxLayout() + layout.setContentsMargins(0, 0, 0, 0) + layout.addWidget(self.label) + layout.addWidget(self.btn) + layout.setStretch(0, 1) + self.setLayout(layout) + + def set_func(self, func): + self.func = func + text = idc.GetFunctionName(self.func.startEA) + text = text[:self.text_max] + "..." if len(text) > self.text_max else text + self.label.setText(text) + + def label_clicked(self, checked): + del checked + idc.Jump(self.func.startEA) + + def btn_clicked(self, checked): + del checked + f = idaapi.choose_func("Choose function to match with database", + self.func.startEA) + if f: + self.set_func(f) + self.changed.emit() + + +class QFunctionRangeSelect(QtWidgets.QWidget): + def __init__(self, text_max_length=30, **kwargs): + super(QFunctionRangeSelect, self).__init__(**kwargs) + self.start = QFunctionSelect(text_max_length=text_max_length) + self.start.changed.connect(self.selection_changed) + self.end = QFunctionSelect(text_max_length=text_max_length) + self.end.changed.connect(self.selection_changed) + + layout = QtWidgets.QGridLayout() + layout.setContentsMargins(0, 0, 0, 0) + layout.addWidget(QtWidgets.QLabel("From"), 0, 0) + layout.addWidget(QtWidgets.QLabel("To"), 1, 0) + layout.addWidget(self.start, 0, 1) + layout.addWidget(self.end, 1, 1) + + self.setLayout(layout) + + def selection_changed(self): + if self.start.func.startEA < self.end.func.endEA: + return + + start_func = self.start.func + self.start.set_func(self.end.func) + self.end.set_func(start_func) diff --git a/idaplugin/rematch/dialogs/match.py b/idaplugin/rematch/dialogs/match.py index d05dd72ef..9fa287911 100755 --- a/idaplugin/rematch/dialogs/match.py +++ b/idaplugin/rematch/dialogs/match.py @@ -8,10 +8,12 @@ class MatchDialog(base.BaseDialog): def __init__(self, **kwargs): super(MatchDialog, self).__init__(title="Match", **kwargs) + self.source_single = base.QFunctionSelect() + self.source_range = base.QFunctionRangeSelect() choices = [("Entire IDB", 'idb', None), ("User functions", 'user', None), - ("Single function", 'single', None), - ("Range", 'range', None)] + ("Single function", 'single', self.source_single), + ("Range", 'range', self.source_range)] self.sourceGrp = self.create_radio_group("Match source", *choices) self.target_project = self.create_item_select('projects', allow_none=False) @@ -49,6 +51,9 @@ def data(self): methods.append('graph') return {'source': self.get_radio_result(self.sourceGrp), + 'source_single': self.source_single.func.startEA, + 'source_range': [self.source_range.start.func.startEA, + self.source_range.end.func.endEA], 'target': self.get_radio_result(self.targetGrp), 'target_project': self.target_project.currentData(), 'target_file': self.target_file.currentData(), diff --git a/server/collab/models.py b/server/collab/models.py index a70bebc44..bbc1999d8 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -121,6 +121,9 @@ class Task(models.Model): action = models.CharField(max_length=16, choices=ACTION_CHOICES) source_file = models.ForeignKey(File, related_name='source_tasks') + # TODO: make sure start > end + source_start = models.PositiveIntegerField(null=True) + source_end = models.PositiveIntegerField(null=True) # TODO: make sure only at least one of target_file/target_project is null target_file = models.ForeignKey(File, null=True) target_project = models.ForeignKey(Project, null=True) diff --git a/server/collab/serializers.py b/server/collab/serializers.py index 541ce0d88..76583883c 100644 --- a/server/collab/serializers.py +++ b/server/collab/serializers.py @@ -35,7 +35,7 @@ class Meta: model = Task fields = ('id', 'task_id', 'created', 'finished', 'owner', 'status', 'action', 'target_project', 'target_file', 'source_file', - 'progress', 'progress_max') + 'source_start', 'source_end', 'progress', 'progress_max') class InstanceSerializer(serializers.ModelSerializer): diff --git a/server/collab/tasks.py b/server/collab/tasks.py index 028a7c659..e7dcd9437 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -8,25 +8,36 @@ @shared_task def match(): - # recording the task has started - task = Task.objects.filter(task_id=match.request.id) - task.update(status=Task.STATUS_STARTED, progress=0, - progress_max=len(matches.match_list)) - - # get input parameters - task_id, source_file, target_project, target_file = \ - task.values('id', 'source_file_id', 'target_project_id', 'target_file_id') - - base_source_vectors = Vector.objects.filter(file_id=source_file) - base_target_vectors = Vector.objects.exclude(file_id=source_file) - if target_project: - base_target_vectors = base_target_vectors.filter(project_id=target_project) - if target_file: - base_target_vectors = base_target_vectors.filter(file_id=target_file) - - print("Running task {}".format(match.request.id)) - # TODO: order might be important here try: + # recording the task has started + task = Task.objects.filter(task_id=match.request.id) + task.update(status=Task.STATUS_STARTED, progress=0, + progress_max=len(matches.match_list)) + + # get input parameters + task_values = task.values_list('id', 'source_file_id', 'source_start', + 'source_end', 'target_project_id', + 'target_file_id')[0] + print(task_values) + (task_id, source_file, source_start, source_end, target_project, + target_file) = task_values + + source_filter = {'file_id': source_file} + if source_start: + source_filter['instance__offset__gte'] = source_start + if source_end: + source_filter['instance__offset__lte'] = source_end + base_source_vectors = Vector.objects.filter(**source_filter) + + target_filter = {'file_id__not': source_file} + if target_project: + target_filter = {'project_id': target_project} + elif target_file: + target_filter = {'file_id': target_file} + base_target_vectors = Vector.objects.filter(**target_filter) + + print("Running task {}".format(match.request.id)) + # TODO: order might be important here for match_type in matches.match_list: print(match_type) start = now() From b8ef12f5801467aaea8ccb0ae2ca803cc73da671 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Thu, 27 Oct 2016 22:06:30 -0700 Subject: [PATCH 65/68] self review fixes part one Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 4 +--- server/collab/matches.py | 6 ------ server/collab/models.py | 12 +----------- server/collab/serializers.py | 4 ++-- 4 files changed, 4 insertions(+), 22 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 10941223c..3f9870c57 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -18,7 +18,6 @@ def __init__(self, *args, **kwargs): self.pbar = None self.timer = None self.task_id = None - self.pbar = None self.instance_set = [] self.source = None @@ -54,7 +53,6 @@ def submit_handler(self, source, source_single, source_range, target, self.target_file = target_file if target == 'file' else None self.methods = methods - # TODO: actually use target and methods functions = self.get_functions() if not functions: return False @@ -121,7 +119,7 @@ def accepted_upload(self): raise NotImplementedError("Unsupported source type encountered in task " "creation") - params = {'action': 'commit', 'source_file': netnode.bound_file_id, + params = {'source_file': netnode.bound_file_id, 'source_start': self.source_range[0], 'source_end': self.source_range[1], 'target_project': self.target_project, diff --git a/server/collab/matches.py b/server/collab/matches.py index 2ab2a24f4..3acf494c5 100644 --- a/server/collab/matches.py +++ b/server/collab/matches.py @@ -19,7 +19,6 @@ class HashMatch(Match): @classmethod def match(cls, source, target): # unique_values = set(source_dict.values()) - print(source.count(), target.count()) flipped_rest = collections.defaultdict(list) # TODO: could be optimized by enumerating all identity matchs together target_values = target.values_list('id', 'instance_id', 'data').iterator() @@ -59,15 +58,10 @@ def match(source, target): target_data = dictvect.transform([json.loads(d) for d in target_data]) source_matrix = skl.preprocessing.normalize(source_data, axis=1, norm='l1') target_matrix = skl.preprocessing.normalize(target_data, axis=1, norm='l1') - print(type(source_matrix)) - print(source_matrix.shape) - print(type(target_matrix)) - print(target_matrix.shape) for source_i in range(source_matrix.shape[0]): source_vector = source_matrix[source_i].toarray() source_id = source_ids[source_i] source_instance_id = source_instance_ids[source_i] - print(source_i) for target_i in range(target_matrix.shape[0]): target_vector = target_matrix[target_i].toarray() diff --git a/server/collab/models.py b/server/collab/models.py index bbc1999d8..244312136 100644 --- a/server/collab/models.py +++ b/server/collab/models.py @@ -99,16 +99,7 @@ class Task(models.Model): (STATUS_STARTED, "Started"), (STATUS_DONE, "Done!"), (STATUS_FAILED, "Failure")) - ACTION_COMMIT = "commit" - ACTION_MATCH = "match" - ACTION_UPDATE = "update" - ACTION_CLUSTER = "cluster" - ACTION_CHOICES = ((ACTION_COMMIT, "Commit"), - (ACTION_MATCH, "Match"), - (ACTION_UPDATE, "Update"), - (ACTION_CLUSTER, "Cluster")) - - # TODO: to uuid field + task_id = models.UUIDField(db_index=True, unique=True, editable=False) # store matched objects @@ -118,7 +109,6 @@ class Task(models.Model): owner = models.ForeignKey(User, db_index=True) status = models.CharField(default=STATUS_PENDING, max_length=16, choices=STATUS_CHOICES) - action = models.CharField(max_length=16, choices=ACTION_CHOICES) source_file = models.ForeignKey(File, related_name='source_tasks') # TODO: make sure start > end diff --git a/server/collab/serializers.py b/server/collab/serializers.py index 76583883c..d8ec85eca 100644 --- a/server/collab/serializers.py +++ b/server/collab/serializers.py @@ -34,8 +34,8 @@ class TaskSerializer(serializers.ModelSerializer): class Meta: model = Task fields = ('id', 'task_id', 'created', 'finished', 'owner', 'status', - 'action', 'target_project', 'target_file', 'source_file', - 'source_start', 'source_end', 'progress', 'progress_max') + 'target_project', 'target_file', 'source_file', 'source_start', + 'source_end', 'progress', 'progress_max') class InstanceSerializer(serializers.ModelSerializer): From c31224f4a7525d41b2119987e7d5109529c9d616 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Thu, 27 Oct 2016 22:30:20 -0700 Subject: [PATCH 66/68] Prevent editing target and source info in existing task Signed-off-by: Nir Izraeli --- server/collab/serializers.py | 8 ++++++++ server/collab/views.py | 10 ++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/server/collab/serializers.py b/server/collab/serializers.py index d8ec85eca..afc1f8058 100644 --- a/server/collab/serializers.py +++ b/server/collab/serializers.py @@ -38,6 +38,14 @@ class Meta: 'source_end', 'progress', 'progress_max') +class TaskEditSerializer(TaskSerializer): + target_project = serializers.ReadOnlyField() + target_file = serializers.ReadOnlyField() + source_file = serializers.ReadOnlyField() + source_start = serializers.ReadOnlyField() + source_end = serializers.ReadOnlyField() + + class InstanceSerializer(serializers.ModelSerializer): class NestedVectorSerializer(serializers.ModelSerializer): class Meta: diff --git a/server/collab/views.py b/server/collab/views.py index 255ce00e1..5575c9b38 100644 --- a/server/collab/views.py +++ b/server/collab/views.py @@ -1,8 +1,8 @@ from rest_framework import viewsets, permissions, mixins from collab.models import Project, File, Task, Instance, Vector from collab.serializers import (ProjectSerializer, FileSerializer, - TaskSerializer, InstanceSerializer, - VectorSerializer) + TaskSerializer, TaskEditSerializer, + InstanceSerializer, VectorSerializer) from collab.permissions import IsOwnerOrReadOnly from collab import tasks @@ -48,6 +48,12 @@ def perform_create(self, serializer): result = tasks.match.delay() serializer.save(owner=self.request.user, task_id=result.id) + def get_serializer_class(self): + serializer_class = self.serializer_class + if self.request.method in ('PATCH', 'PUT'): + serializer_class = TaskEditSerializer + return serializer_class + class InstanceViewSet(ViewSetManyAllowedMixin, ViewSetOwnerMixin, viewsets.ModelViewSet): From 6cb948c6ca5c625646e3e1ccedc62c812f328d46 Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Sat, 29 Oct 2016 17:35:32 -0700 Subject: [PATCH 67/68] Replace create_item_select and create_radio_select with QItemSelect and QRadioGroup Signed-off-by: Nir Izraeli --- idaplugin/rematch/actions/match.py | 18 ++-- idaplugin/rematch/dialogs/base.py | 135 ++++++++++++++++----------- idaplugin/rematch/dialogs/match.py | 16 ++-- idaplugin/rematch/dialogs/project.py | 2 +- server/collab/tasks.py | 2 +- 5 files changed, 103 insertions(+), 70 deletions(-) diff --git a/idaplugin/rematch/actions/match.py b/idaplugin/rematch/actions/match.py index 3f9870c57..575809911 100755 --- a/idaplugin/rematch/actions/match.py +++ b/idaplugin/rematch/actions/match.py @@ -64,7 +64,8 @@ def submit_handler(self, source, source_single, source_range, target, self.pbar.setRange(0, len(functions)) self.pbar.setValue(0) self.pbar.canceled.connect(self.cancel_upload) - self.pbar.accepted.connect(self.accepted_upload) + self.pbar.rejected.connect(self.reject_upload) + self.pbar.accepted.connect(self.accept_upload) self.timer = QtCore.QTimer() self.timer.timeout.connect(self.perform_upload) @@ -106,7 +107,10 @@ def cancel_upload(self): self.timer = None self.pbar = None - def accepted_upload(self): + def reject_upload(self): + self.cancel_upload() + + def accept_upload(self): self.cancel_upload() if self.source == 'idb': @@ -134,7 +138,8 @@ def accepted_upload(self): self.pbar.setRange(0, int(r['progress_max']) if r['progress_max'] else 0) self.pbar.setValue(int(r['progress'])) self.pbar.canceled.connect(self.cancel_task) - self.pbar.accepted.connect(self.accepted_task) + self.pbar.rejected.connect(self.reject_task) + self.pbar.accepted.connect(self.accept_task) self.pbar.show() self.timer = QtCore.QTimer() @@ -151,8 +156,6 @@ def perform_task(self): status = r['status'] if status == 'failed': self.pbar.reject() - self.timer.stop() - self.timer = None elif progress_max: self.pbar.setMaximum(progress_max) if progress >= progress_max: @@ -168,5 +171,8 @@ def cancel_task(self): self.timer = None self.pbar = None - def accepted_task(self): + def reject_task(self): + self.cancel_task() + + def accept_task(self): self.cancel_task() diff --git a/idaplugin/rematch/dialogs/base.py b/idaplugin/rematch/dialogs/base.py index 31866bd93..ec10f0b8a 100755 --- a/idaplugin/rematch/dialogs/base.py +++ b/idaplugin/rematch/dialogs/base.py @@ -18,65 +18,10 @@ def __init__(self, title="", reject_handler=None, submit_handler=None, self.exception_handler = exception_handler self.response = None self.statusLbl = None - self.radio_groups = {} self.base_layout = QtWidgets.QVBoxLayout() self.setLayout(self.base_layout) - def create_radio_group(self, title, *radios, **kwargs): - radiogroup = QtWidgets.QButtonGroup() - groupbox = QtWidgets.QGroupBox(title) - layout = QtWidgets.QGridLayout() - layout.setColumnStretch(1, 1) - checked = kwargs.pop('checked', None) - - self.radio_groups[radiogroup] = [] - for i, radio in enumerate(radios): - radio_name, radio_id, radio_extra_controls = radio - radio_widget = QtWidgets.QRadioButton(radio_name) - - radiogroup.addButton(radio_widget, i) - layout.addWidget(radio_widget, i, 0, QtCore.Qt.AlignTop) - if radio_extra_controls is not None: - layout.addWidget(radio_extra_controls, i, 1, QtCore.Qt.AlignTop) - # if extra controller comes disabled, make sure it stays that way - # and also make the radio box disabled - if radio_extra_controls.isEnabled(): - radio_widget.toggled.connect(radio_extra_controls.setEnabled) - radio_extra_controls.setEnabled(False) - else: - radio_widget.setEnabled(False) - - # if checked is supplied, set correct radio as checked - # else set first radio as checked` - if (checked is None and i == 0) or checked == radio_id: - radio_widget.setChecked(True) - - self.radio_groups[radiogroup].append(radio_id) - groupbox.setLayout(layout) - self.base_layout.addWidget(groupbox) - - return radiogroup - - @staticmethod - def create_item_select(item, allow_none=True, exclude=None): - response = network.query("GET", "collab/{}/".format(item), json=True) - combobox = QtWidgets.QComboBox() - for idx, obj in enumerate(response): - if exclude and (obj['name'] in exclude or obj['id'] in exclude): - continue - text = "{} ({})".format(obj['name'], obj['id']) - combobox.insertItem(idx, text, int(obj['id'])) - if allow_none: - combobox.insertItem(0, "None", None) - elif combobox.count() == 0: - combobox.setEnabled(False) - return combobox - - def get_radio_result(self, group): - group_ids = self.radio_groups[group] - return group_ids[group.checkedId()] - def bottom_layout(self, ok_text="&Ok", cencel_text="&Cancel"): self.statusLbl = QtWidgets.QLabel() self.base_layout.addWidget(self.statusLbl) @@ -158,6 +103,86 @@ def get(cls, **kwargs): return data, result == QtWidgets.QDialog.Accepted +class QItemSelect(QtWidgets.QComboBox): + def __init__(self, item, name_field='name', id_field='id', allow_none=True, + exclude=None, default_id=None): + super(QItemSelect, self).__init__() + self.item = item + self.name_field = name_field + self.id_field = id_field + self.allow_none = allow_none + self.exclude = exclude + self.default_id = default_id + + self.refresh() + + def refresh(self): + response = network.query("GET", "collab/{}/".format(self.item), json=True) + + # copy currently selected or get default + if self.currentIndex() == -1: + selected_id = self.default_id + else: + self.currentData() + + # only clear after response is received + self.clear() + for idx, obj in enumerate(response): + item_name = obj[self.name_field] + item_id = obj[self.id_field] + if self.exclude and (item_name in self.exclude or + item_id in self.exclude): + continue + + text = "{} ({})".format(item_name, item_id) + self.insertItem(idx, text, int(item_id)) + if item_id == selected_id: + self.setCurrentIndex(idx) + + if self.allow_none: + self.insertItem(0, "None", None) + elif self.count() == 0: + self.setEnabled(False) + + +class QRadioGroup(QtWidgets.QGroupBox): + def __init__(self, title, *radios, **kwargs): + checked = kwargs.pop('checked', None) + + super(QRadioGroup, self).__init__(title, **kwargs) + + self.radiogroup = QtWidgets.QButtonGroup() + layout = QtWidgets.QGridLayout() + layout.setColumnStretch(1, 1) + + for i, radio in enumerate(radios): + radio_name, radio_id, radio_extra_controls = radio + radio_widget = QtWidgets.QRadioButton(radio_name) + radio_widget.setObjectName(radio_id) + + self.radiogroup.addButton(radio_widget, i) + layout.addWidget(radio_widget, i, 0, QtCore.Qt.AlignTop) + if radio_extra_controls is not None: + layout.addWidget(radio_extra_controls, i, 1, QtCore.Qt.AlignTop) + # if extra controller comes disabled, make sure it stays that way + # and also make the radio box disabled + if radio_extra_controls.isEnabled(): + radio_widget.toggled.connect(radio_extra_controls.setEnabled) + radio_extra_controls.setEnabled(False) + else: + radio_widget.setEnabled(False) + + # if checked is supplied, set correct radio as checked + # else set first radio as checked` + if (checked is None and i == 0) or checked == radio_id: + radio_widget.setChecked(True) + + self.setLayout(layout) + + def get_result(self): + return self.radiogroup.checkedButton().objectName() + + class QFunctionSelect(QtWidgets.QWidget): changed = QtCore.Signal() diff --git a/idaplugin/rematch/dialogs/match.py b/idaplugin/rematch/dialogs/match.py index 9fa287911..21016b85c 100755 --- a/idaplugin/rematch/dialogs/match.py +++ b/idaplugin/rematch/dialogs/match.py @@ -14,15 +14,17 @@ def __init__(self, **kwargs): ("User functions", 'user', None), ("Single function", 'single', self.source_single), ("Range", 'range', self.source_range)] - self.sourceGrp = self.create_radio_group("Match source", *choices) + self.sourceGrp = base.QRadioGroup("Match source", *choices) + self.base_layout.addWidget(self.sourceGrp) - self.target_project = self.create_item_select('projects', allow_none=False) - self.target_file = self.create_item_select('files', allow_none=False, - exclude=[netnode.bound_file_id]) + self.target_project = base.QItemSelect('projects', allow_none=False) + self.target_file = base.QItemSelect('files', allow_none=False, + exclude=[netnode.bound_file_id]) choices = [("Entire DB", 'db', None), ("Project", 'project', self.target_project), ("Another file", 'file', self.target_file)] - self.targetGrp = self.create_radio_group("Match target", *choices) + self.targetGrp = base.QRadioGroup("Match target", *choices) + self.base_layout.addWidget(self.targetGrp) self.identity = QtWidgets.QCheckBox("Identify matches") self.fuzzy = QtWidgets.QCheckBox("Fuzzy matches") @@ -50,11 +52,11 @@ def data(self): if self.graph.isChecked(): methods.append('graph') - return {'source': self.get_radio_result(self.sourceGrp), + return {'source': self.sourceGrp.get_result(), 'source_single': self.source_single.func.startEA, 'source_range': [self.source_range.start.func.startEA, self.source_range.end.func.endEA], - 'target': self.get_radio_result(self.targetGrp), + 'target': self.targetGrp.get_result(), 'target_project': self.target_project.currentData(), 'target_file': self.target_file.currentData(), 'methods': methods} diff --git a/idaplugin/rematch/dialogs/project.py b/idaplugin/rematch/dialogs/project.py index 67ba070d5..123fd77fa 100755 --- a/idaplugin/rematch/dialogs/project.py +++ b/idaplugin/rematch/dialogs/project.py @@ -53,7 +53,7 @@ def __init__(self, **kwargs): gridLyt.addWidget(QtWidgets.QLabel("Description:"), 2, 0) gridLyt.addWidget(QtWidgets.QLabel("MD5 hash:"), 3, 0) - self.projectCbb = self.create_item_select('projects') + self.projectCbb = base.QItemSelect('projects') gridLyt.addWidget(self.projectCbb, 0, 1) self.nameTxt = QtWidgets.QLineEdit() diff --git a/server/collab/tasks.py b/server/collab/tasks.py index e7dcd9437..7bb455780 100644 --- a/server/collab/tasks.py +++ b/server/collab/tasks.py @@ -31,7 +31,7 @@ def match(): target_filter = {'file_id__not': source_file} if target_project: - target_filter = {'project_id': target_project} + target_filter = {'file__project_id': target_project} elif target_file: target_filter = {'file_id': target_file} base_target_vectors = Vector.objects.filter(**target_filter) From 41ccc63af88f961539dc36889ed37cf8be2ab87c Mon Sep 17 00:00:00 2001 From: Nir Izraeli Date: Tue, 1 Nov 2016 21:43:12 -0700 Subject: [PATCH 68/68] Split matches.py to entire directory of match type definitions Signed-off-by: Nir Izraeli --- server/collab/matches.py | 86 -------------------------- server/collab/matches/__init__.py | 11 ++++ server/collab/matches/assembly_hash.py | 6 ++ server/collab/matches/hash_match.py | 20 ++++++ server/collab/matches/hist_match.py | 39 ++++++++++++ server/collab/matches/match.py | 5 ++ server/collab/matches/mnemonic_hash.py | 6 ++ server/collab/matches/mnemonic_hist.py | 6 ++ server/collab/matches/opcode_hist.py | 6 ++ 9 files changed, 99 insertions(+), 86 deletions(-) delete mode 100644 server/collab/matches.py create mode 100644 server/collab/matches/__init__.py create mode 100644 server/collab/matches/assembly_hash.py create mode 100644 server/collab/matches/hash_match.py create mode 100644 server/collab/matches/hist_match.py create mode 100644 server/collab/matches/match.py create mode 100644 server/collab/matches/mnemonic_hash.py create mode 100644 server/collab/matches/mnemonic_hist.py create mode 100644 server/collab/matches/opcode_hist.py diff --git a/server/collab/matches.py b/server/collab/matches.py deleted file mode 100644 index 3acf494c5..000000000 --- a/server/collab/matches.py +++ /dev/null @@ -1,86 +0,0 @@ -import collections -import itertools -import json - -import numpy as np -import sklearn as skl -import sklearn.preprocessing # noqa flake8 importing as a different name -import sklearn.feature_extraction # noqa flake8 importing as a different name - - -class Match: - @classmethod - def match(cls, source, target): - raise NotImplementedError("Method match for vector type {} not " - "implemented".format(cls)) - - -class HashMatch(Match): - @classmethod - def match(cls, source, target): - # unique_values = set(source_dict.values()) - flipped_rest = collections.defaultdict(list) - # TODO: could be optimized by enumerating all identity matchs together - target_values = target.values_list('id', 'instance_id', 'data').iterator() - for target_id, target_instance_id, target_data in target_values: - # TODO: could be optimized by uncommenting next line as most 'target' - # values won't be present in 'source' list - # if v in unique_values: - flipped_rest[target_data].append((target_id, target_instance_id)) - source_values = source.values_list('id', 'instance_id', 'data').iterator() - for source_id, source_instance_id, source_data in source_values: - for target_id, target_instance_id in flipped_rest.get(source_data, ()): - yield source_id, source_instance_id, target_id, target_instance_id, 100 - - -class AssemblyHashMatch(HashMatch): - vector_type = 'assembly_hash' - match_type = 'assembly_hash' - - -class MnemonicHashMatch(HashMatch): - vector_type = 'mnemonic_hash' - match_type = 'mnemonic_hash' - - -class HistogramMatch(Match): - @staticmethod - def match(source, target): - source_values = itertools.izip(*source.values_list('id', 'instance_id', - 'data')) - target_values = itertools.izip(*target.values_list('id', 'instance_id', - 'data')) - - source_ids, source_instance_ids, source_data = source_values - target_ids, target_instance_ids, target_data = target_values - dictvect = skl.feature_extraction.DictVectorizer() - source_data = dictvect.fit_transform([json.loads(d) for d in source_data]) - target_data = dictvect.transform([json.loads(d) for d in target_data]) - source_matrix = skl.preprocessing.normalize(source_data, axis=1, norm='l1') - target_matrix = skl.preprocessing.normalize(target_data, axis=1, norm='l1') - for source_i in range(source_matrix.shape[0]): - source_vector = source_matrix[source_i].toarray() - source_id = source_ids[source_i] - source_instance_id = source_instance_ids[source_i] - - for target_i in range(target_matrix.shape[0]): - target_vector = target_matrix[target_i].toarray() - target_id = target_ids[target_i] - target_instance_id = target_instance_ids[target_i] - - score = np.linalg.norm(source_vector - target_vector) - yield (source_id, source_instance_id, target_id, target_instance_id, - score) - - -class MnemonicHistogramMatch(HistogramMatch): - vector_type = 'mnemonic_hist' - match_type = 'mnemonic_hist' - - -class OpcodeHistogramMatch(HistogramMatch): - vector_type = 'opcode_histogram' - match_type = 'opcode_histogram' - -match_list = [AssemblyHashMatch, MnemonicHashMatch, MnemonicHistogramMatch, - OpcodeHistogramMatch] diff --git a/server/collab/matches/__init__.py b/server/collab/matches/__init__.py new file mode 100644 index 000000000..2ca03222d --- /dev/null +++ b/server/collab/matches/__init__.py @@ -0,0 +1,11 @@ +from assembly_hash import AssemblyHashMatch +from mnemonic_hash import MnemonicHashMatch +from mnemonic_hist import MnemonicHistogramMatch +from opcode_hist import OpcodeHistogramMatch + + +match_list = [AssemblyHashMatch, MnemonicHashMatch, MnemonicHistogramMatch, + OpcodeHistogramMatch] + +__all__ = ['AssemblyHashMatch', 'MnemonicHashMatch', 'MnemonicHistogramMatch', + 'OpcodeHistogramMatch', 'match_list'] diff --git a/server/collab/matches/assembly_hash.py b/server/collab/matches/assembly_hash.py new file mode 100644 index 000000000..c8b788163 --- /dev/null +++ b/server/collab/matches/assembly_hash.py @@ -0,0 +1,6 @@ +import hash_match + + +class AssemblyHashMatch(hash_match.HashMatch): + vector_type = 'assembly_hash' + match_type = 'assembly_hash' diff --git a/server/collab/matches/hash_match.py b/server/collab/matches/hash_match.py new file mode 100644 index 000000000..f90f7398a --- /dev/null +++ b/server/collab/matches/hash_match.py @@ -0,0 +1,20 @@ +import collections +import match + + +class HashMatch(match.Match): + @classmethod + def match(cls, source, target): + # unique_values = set(source_dict.values()) + flipped_rest = collections.defaultdict(list) + # TODO: could be optimized by enumerating all identity matchs together + target_values = target.values_list('id', 'instance_id', 'data').iterator() + for target_id, target_instance_id, target_data in target_values: + # TODO: could be optimized by uncommenting next line as most 'target' + # values won't be present in 'source' list + # if v in unique_values: + flipped_rest[target_data].append((target_id, target_instance_id)) + source_values = source.values_list('id', 'instance_id', 'data').iterator() + for source_id, source_instance_id, source_data in source_values: + for target_id, target_instance_id in flipped_rest.get(source_data, ()): + yield source_id, source_instance_id, target_id, target_instance_id, 100 diff --git a/server/collab/matches/hist_match.py b/server/collab/matches/hist_match.py new file mode 100644 index 000000000..ed6002047 --- /dev/null +++ b/server/collab/matches/hist_match.py @@ -0,0 +1,39 @@ +import itertools +import json + +import numpy as np +import sklearn as skl +import sklearn.preprocessing # noqa flake8 importing as a different name +import sklearn.feature_extraction # noqa flake8 importing as a different name + +import match + + +class HistogramMatch(match.Match): + @staticmethod + def match(source, target): + source_values = itertools.izip(*source.values_list('id', 'instance_id', + 'data')) + target_values = itertools.izip(*target.values_list('id', 'instance_id', + 'data')) + + source_ids, source_instance_ids, source_data = source_values + target_ids, target_instance_ids, target_data = target_values + dictvect = skl.feature_extraction.DictVectorizer() + source_data = dictvect.fit_transform([json.loads(d) for d in source_data]) + target_data = dictvect.transform([json.loads(d) for d in target_data]) + source_matrix = skl.preprocessing.normalize(source_data, axis=1, norm='l1') + target_matrix = skl.preprocessing.normalize(target_data, axis=1, norm='l1') + for source_i in range(source_matrix.shape[0]): + source_vector = source_matrix[source_i].toarray() + source_id = source_ids[source_i] + source_instance_id = source_instance_ids[source_i] + + for target_i in range(target_matrix.shape[0]): + target_vector = target_matrix[target_i].toarray() + target_id = target_ids[target_i] + target_instance_id = target_instance_ids[target_i] + + score = np.linalg.norm(source_vector - target_vector) + yield (source_id, source_instance_id, target_id, target_instance_id, + score) diff --git a/server/collab/matches/match.py b/server/collab/matches/match.py new file mode 100644 index 000000000..64a500e11 --- /dev/null +++ b/server/collab/matches/match.py @@ -0,0 +1,5 @@ +class Match: + @classmethod + def match(cls, source, target): + raise NotImplementedError("Method match for vector type {} not " + "implemented".format(cls)) diff --git a/server/collab/matches/mnemonic_hash.py b/server/collab/matches/mnemonic_hash.py new file mode 100644 index 000000000..df1b1c8c0 --- /dev/null +++ b/server/collab/matches/mnemonic_hash.py @@ -0,0 +1,6 @@ +import hash_match + + +class MnemonicHashMatch(hash_match.HashMatch): + vector_type = 'mnemonic_hash' + match_type = 'mnemonic_hash' diff --git a/server/collab/matches/mnemonic_hist.py b/server/collab/matches/mnemonic_hist.py new file mode 100644 index 000000000..e7a39b6b9 --- /dev/null +++ b/server/collab/matches/mnemonic_hist.py @@ -0,0 +1,6 @@ +import hist_match + + +class MnemonicHistogramMatch(hist_match.HistogramMatch): + vector_type = 'mnemonic_hist' + match_type = 'mnemonic_hist' diff --git a/server/collab/matches/opcode_hist.py b/server/collab/matches/opcode_hist.py new file mode 100644 index 000000000..5a180bfa8 --- /dev/null +++ b/server/collab/matches/opcode_hist.py @@ -0,0 +1,6 @@ +import hist_match + + +class OpcodeHistogramMatch(hist_match.HistogramMatch): + vector_type = 'opcode_histogram' + match_type = 'opcode_histogram'