From 44b5098a5a1c0e7f3156d789d1c6b7964e06628a Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 16 Oct 2020 16:10:32 +0700 Subject: [PATCH 01/34] Implement files data-access object --- db/files_dao.py | 243 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 db/files_dao.py diff --git a/db/files_dao.py b/db/files_dao.py new file mode 100644 index 00000000..3f51dfb5 --- /dev/null +++ b/db/files_dao.py @@ -0,0 +1,243 @@ +from dataclasses import dataclass, field +from datetime import datetime +from typing import List + +from sqlalchemy import or_, func, literal_column +from sqlalchemy.orm import aliased + +from db.schema import Files, Matches, VideoMetadata, Exif + + +class FileMatchFilter: + """Enum for file match filtering criteria.""" + ALL = "all" + RELATED = "related" + DUPLICATES = "duplicates" + UNIQUE = "unique" + + values = {ALL, RELATED, DUPLICATES, UNIQUE} + + +class FileSort: + """Enum for result ordering.""" + DATE = "date" + LENGTH = "length" + RELATED = "related" + DUPLICATES = "duplicates" + + values = {DATE, LENGTH, RELATED, DUPLICATES} + + +@dataclass +class ListFilesRequest: + """Parameters for list-files query.""" + + limit: int = 20 + offset: int = 0 + path_query: str = None + extensions: List[str] = field(default_factory=list) + exif: bool = None + audio: bool = None + min_length: int = None + max_length: int = None + date_from: datetime = None + date_to: datetime = None + preload: list = field(default_factory=list) + sort: str = None + match_filter: str = FileMatchFilter.ALL + related_distance: float = 0.4 + duplicate_distance: float = 0.1 + + +@dataclass +class Counts: + """Count of files by matches.""" + total: int + related: int + duplicates: int + unique: int + + +@dataclass +class ListFilesResults: + """Results of list-files query.""" + items: List[Files] + counts: Counts + + +class FilesDAO: + """Data-access object for files.""" + + # Format in which Dates are currently stored in exif table. + _EXIF_DATE_FORMAT = " UTC %Y-%m-%d 00" + + # Label for related entities count (matches, scenes, etc.) + _LABEL_COUNT = "hit_count" + _countable_match = aliased(Matches) + + @staticmethod + def list_files(req: ListFilesRequest, session) -> ListFilesResults: + """Query multiple files.""" + # Count files + query = session.query(Files) + query = FilesDAO._filter_by_file_attributes(req, query) + counts = FilesDAO.counts(query, req.related_distance, req.duplicate_distance) + + # Select files + sortable_attributes = FilesDAO._sortable_attributes(req) + query = session.query(Files, *sortable_attributes) + query = FilesDAO._filter_by_file_attributes(req, query) + query = FilesDAO._filter_by_matches(req, query) + query = FilesDAO._sort_items(req, query) + + # Retrieve slice + query = query.offset(req.offset).limit(req.limit) + items = query.all() + + # Get files from result set if there are additional attributes. + if len(sortable_attributes) > 0: + items = [item[0] for item in items] + + return ListFilesResults(items=items, counts=counts) + + @staticmethod + def counts(query, related_distance, duplicate_distance): + """Count queried files by matches.""" + total = query.count() + duplicates = query.filter(FilesDAO.has_matches(duplicate_distance)).count() + related = query.filter(FilesDAO.has_matches(related_distance)).count() + unique = total - related + return Counts( + total=total, + related=related, + duplicates=duplicates, + unique=unique) + + @staticmethod + def has_matches(threshold): + """Create a filter criteria to check if there is a match + with distance lesser or equal to the given threshold.""" + return or_(Files.source_matches.any(Matches.distance <= threshold), + Files.target_matches.any(Matches.distance <= threshold)) + + @staticmethod + def file_matches(file_id, session): + """Query for all file matches.""" + return session.query(Matches).filter(or_( + Matches.query_video_file_id == file_id, + Matches.match_video_file_id == file_id + )) + + @staticmethod + def _sortable_attributes(req: ListFilesRequest): + """Get additional sortable attributes.""" + values = [] + if req.sort == FileSort.RELATED or req.sort == FileSort.DUPLICATES: + match_count = func.count(FilesDAO._countable_match.id).label(FilesDAO._LABEL_COUNT) + values.append(match_count) + return values + + @staticmethod + def _sort_items(req: ListFilesRequest, query): + """Apply ordering.""" + if req.sort == FileSort.RELATED or req.sort == FileSort.DUPLICATES: + match = FilesDAO._countable_match + threshold = req.related_distance if req.sort == FileSort.RELATED else req.duplicate_distance + query = query.outerjoin(FilesDAO._countable_match, + ((match.query_video_file_id == Files.id) | + (match.match_video_file_id == Files.id)) & (match.distance < threshold)) + return query.group_by(Files.id).order_by(literal_column(FilesDAO._LABEL_COUNT).desc(), Files.id.asc()) + elif req.sort == FileSort.LENGTH: + meta = aliased(VideoMetadata) + return query.outerjoin(meta).order_by(meta.video_length.desc(), Files.id.asc()) + elif req.sort == FileSort.DATE: + exif = aliased(Exif) + return query.outerjoin(exif).order_by(exif.General_Encoded_Date.desc(), Files.id.asc()) + return query + + @staticmethod + def _filter_path(req: ListFilesRequest, query): + """Filter by file name.""" + if req.path_query: + return query.filter(Files.file_path.ilike(f"%{req.path_query}%")) + return query + + @staticmethod + def _filter_extensions(req: ListFilesRequest, query): + """Filter by file extension.""" + if req.extensions: + conditions = (Files.file_path.ilike(f"%.{ext}") for ext in req.extensions) + return query.filter(or_(*conditions)) + return query + + @staticmethod + def _filter_exif(req: ListFilesRequest, query): + """Filter by EXIF data presence.""" + if req.exif is not None: + has_exif = Files.exif.has() + if req.exif: + return query.filter(has_exif) + else: + return query.filter(~has_exif) + return query + + @staticmethod + def _filter_audio(req: ListFilesRequest, query): + """Filter by audio presence.""" + if req.audio is not None: + has_audio = Files.exif.has(Exif.Audio_Duration > 0) + if req.audio: + return query.filter(has_audio) + else: + return query.filter(~has_audio) + return query + + @staticmethod + def _filter_date(req: ListFilesRequest, query): + """Filter by creation date.""" + if req.date_from is not None: + query = query.filter( + Files.exif.has(Exif.General_Encoded_Date >= req.date_from.strftime(FilesDAO._EXIF_DATE_FORMAT))) + + if req.date_to is not None: + query = query.filter( + Files.exif.has(Exif.General_Encoded_Date <= req.date_to.strftime(FilesDAO._EXIF_DATE_FORMAT))) + + return query + + @staticmethod + def _filter_length(req: ListFilesRequest, query): + """Filter by length.""" + if req.min_length is not None or req.max_length is not None: + query = query.join(Files.meta) + + if req.min_length is not None: + query = query.filter(VideoMetadata.video_length >= req.min_length) + + if req.max_length is not None: + query = query.filter(VideoMetadata.video_length <= req.max_length) + + return query + + @staticmethod + def _filter_by_matches(req: ListFilesRequest, query): + """Filter by presence of similar files.""" + if req.match_filter == FileMatchFilter.DUPLICATES: + return query.filter(FilesDAO.has_matches(req.duplicate_distance)) + elif req.match_filter == FileMatchFilter.RELATED: + return query.filter(FilesDAO.has_matches(req.related_distance)) + elif req.match_filter == FileMatchFilter.UNIQUE: + return query.filter(~FilesDAO.has_matches(req.related_distance)) + # else MatchCategory.ALL + return query + + @staticmethod + def _filter_by_file_attributes(req: ListFilesRequest, query): + """Apply filters related to the properties of video file itself.""" + query = FilesDAO._filter_path(req, query) + query = FilesDAO._filter_extensions(req, query) + query = FilesDAO._filter_exif(req, query) + query = FilesDAO._filter_audio(req, query) + query = FilesDAO._filter_date(req, query) + query = FilesDAO._filter_length(req, query) + return query From a527fe1dcbb51e149c2a201e090d73ebb8df3c2d Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 16 Oct 2020 16:10:51 +0700 Subject: [PATCH 02/34] Migrate to files-dao --- server/server/api/files.py | 290 +++++------------------------------ server/server/api/helpers.py | 46 +++--- server/server/api/matches.py | 9 +- 3 files changed, 73 insertions(+), 272 deletions(-) diff --git a/server/server/api/files.py b/server/server/api/files.py index 46e096e2..66584a65 100644 --- a/server/server/api/files.py +++ b/server/server/api/files.py @@ -1,283 +1,73 @@ -from datetime import datetime from http import HTTPStatus from os.path import dirname, basename -from typing import List, Dict -from dataclasses import dataclass, field from flask import jsonify, request, abort, send_from_directory -from sqlalchemy import or_, func, literal_column -from sqlalchemy.orm import aliased -from db.schema import Files, Exif, VideoMetadata, Matches, Signature, Scene +from db.files_dao import ListFilesRequest, FileMatchFilter, FileSort, FilesDAO +from db.schema import Files from thumbnail.ffmpeg import extract_frame_tmp from .blueprint import api -from .helpers import file_matches, parse_boolean, parse_positive_int, parse_date, parse_enum, get_config, has_matches, \ - get_thumbnails, resolve_video_file_path +from .helpers import parse_boolean, parse_positive_int, parse_date, parse_enum, get_thumbnails, \ + resolve_video_file_path, Fields, parse_fields, parse_seq from ..model import database, Transform - -class MatchCategory: - """Enum for match distance criteria.""" - ALL = "all" - RELATED = "related" - DUPLICATES = "duplicates" - UNIQUE = "unique" - - values = {ALL, RELATED, DUPLICATES, UNIQUE} - - -class Sort: - """Enum for result ordering.""" - DATE = "date" - LENGTH = "length" - RELATED = "related" - DUPLICATES = "duplicates" - - values = {DATE, LENGTH, RELATED, DUPLICATES} - - -@dataclass -class Counts: - """Count of files by matches.""" - total: int = 0 - related: int = 0 - duplicates: int = 0 - unique: int = 0 - - @staticmethod - def get(query, related_distance, duplicate_distance): - """Count queried files by matches.""" - total = query.count() - duplicates = query.filter(has_matches(duplicate_distance)).count() - related = query.filter(has_matches(related_distance)).count() - unique = total - related - return Counts(total=total, related=related, duplicates=duplicates, unique=unique) - - -@dataclass -class Arguments: - """Parsed request arguments.""" - - # Request parameters: - limit: int = 20 - offset: int = 0 - path_query: str = None - extensions: List[str] = field(default_factory=list) - exif: bool = None - audio: bool = None - min_length: int = None - max_length: int = None - date_from: datetime = None - date_to: datetime = None - include: Dict[str, bool] = field(default_factory=dict) - match_category: str = MatchCategory.ALL - sort: str = None - - # Query options for additional fields that could be included on demand - _ADDITIONAL_FIELDS = { - "meta": (Files.meta, VideoMetadata), - "signature": (Files.signature, Signature), - "exif": (Files.exif, Exif), - "scenes": (Files.scenes, Scene), - } - - # Format in which Dates are currently stored in exif table. - _EXIF_DATE_FORMAT = " UTC %Y-%m-%d 00" - - # Label for related entities count (matches, scenes, etc.) - _LABEL_COUNT = "hit_count" - _countable_match = aliased(Matches) - - @staticmethod - def parse_extensions(): - """File extensions to search""" - extensions = request.args.get('extensions', '', type=str) - extensions = [ext.strip() for ext in extensions.split(',')] - extensions = [ext for ext in extensions if len(ext) > 0] - return extensions - - @staticmethod - def parse_include(): - """Additional fields to include""" - fields = request.args.get('include', '', type=str) - fields = set(field.strip() for field in fields.split(',')) - include = {field: (field in fields) for field in Arguments._ADDITIONAL_FIELDS} - return include - - @staticmethod - def parse(): - """Parse and validate request arguments.""" - result = Arguments() - result.limit = parse_positive_int(request.args, 'limit', 20) - result.offset = parse_positive_int(request.args, 'offset', 0) - result.path_query = request.args.get('path', '', type=str).strip() - result.exif = parse_boolean(request.args, 'exif') - result.audio = parse_boolean(request.args, 'audio') - result.min_length = parse_positive_int(request.args, 'min_length') - result.max_length = parse_positive_int(request.args, 'max_length') - result.include = Arguments.parse_include() - result.extensions = Arguments.parse_extensions() - result.date_from = parse_date(request.args, "date_from") - result.date_to = parse_date(request.args, "date_to") - result.match_category = parse_enum(request.args, "matches", - values=MatchCategory.values, - default=MatchCategory.ALL) - result.sort = parse_enum(request.args, "sort", values=Sort.values, default=None) - return result - - def sortable_attributes(self): - """Get additional sortable attributes.""" - values = [] - if self.sort == Sort.RELATED or self.sort == Sort.DUPLICATES: - match_count = func.count(self._countable_match.id).label(self._LABEL_COUNT) - values.append(match_count) - return values - - def sort_items(self, query, related_distance, duplicate_distance): - """Apply ordering.""" - if self.sort == Sort.RELATED or self.sort == Sort.DUPLICATES: - match = self._countable_match - threshold = related_distance if self.sort == Sort.RELATED else duplicate_distance - query = query.outerjoin(self._countable_match, - ((match.query_video_file_id == Files.id) | - (match.match_video_file_id == Files.id)) & (match.distance < threshold)) - return query.group_by(Files.id).order_by(literal_column(self._LABEL_COUNT).desc(), Files.id.asc()) - elif self.sort == Sort.LENGTH: - meta = aliased(VideoMetadata) - return query.outerjoin(meta).order_by(meta.video_length.desc(), Files.id.asc()) - elif self.sort == Sort.DATE: - exif = aliased(Exif) - return query.outerjoin(exif).order_by(exif.General_Encoded_Date.desc(), Files.id.asc()) - return query - - def filter_path(self, query): - """Filter by file name.""" - if self.path_query: - return query.filter(Files.file_path.ilike(f"%{self.path_query}%")) - return query - - def filter_extensions(self, query): - """Filter by file extension.""" - if self.extensions: - conditions = (Files.file_path.ilike(f"%.{ext}") for ext in self.extensions) - return query.filter(or_(*conditions)) - return query - - def filter_exif(self, query): - """Filter by EXIF data presence.""" - if self.exif is not None: - has_exif = Files.exif.has() - if self.exif: - return query.filter(has_exif) - else: - return query.filter(~has_exif) - return query - - def filter_audio(self, query): - """Filter by audio presence.""" - if self.audio is not None: - has_audio = Files.exif.has(Exif.Audio_Duration > 0) - if self.audio: - return query.filter(has_audio) - else: - return query.filter(~has_audio) - return query - - def filter_date(self, query): - """Filter by creation date.""" - if self.date_from is not None: - query = query.filter( - Files.exif.has(Exif.General_Encoded_Date >= self.date_from.strftime(self._EXIF_DATE_FORMAT))) - - if self.date_to is not None: - query = query.filter( - Files.exif.has(Exif.General_Encoded_Date <= self.date_to.strftime(self._EXIF_DATE_FORMAT))) - - return query - - def filter_length(self, query): - """Filter by length.""" - if self.min_length is not None or self.max_length is not None: - query = query.join(Files.meta) - - if self.min_length is not None: - query = query.filter(VideoMetadata.video_length >= self.min_length) - - if self.max_length is not None: - query = query.filter(VideoMetadata.video_length <= self.max_length) - - return query - - def filter_by_matches(self, query, related_distance, duplicate_distance): - """Filter by presence of similar files.""" - if self.match_category == MatchCategory.DUPLICATES: - return query.filter(has_matches(duplicate_distance)) - elif self.match_category == MatchCategory.RELATED: - return query.filter(has_matches(related_distance)) - elif self.match_category == MatchCategory.UNIQUE: - return query.filter(~has_matches(related_distance)) - # else MatchCategory.ALL - return query - - def filter_by_file_attributes(self, query): - """Apply filters related to the properties of video file itself.""" - query = self.filter_path(query) - query = self.filter_extensions(query) - query = self.filter_exif(query) - query = self.filter_audio(query) - query = self.filter_date(query) - query = self.filter_length(query) - return query +# Optional file fields to be loaded +FILE_FIELDS = Fields(Files.exif, Files.meta, Files.signature, Files.scenes) + + +def parse(): + """Parse and validate request arguments.""" + result = ListFilesRequest() + result.limit = parse_positive_int(request.args, 'limit', 20) + result.offset = parse_positive_int(request.args, 'offset', 0) + result.path_query = request.args.get('path', '', type=str).strip() + result.exif = parse_boolean(request.args, 'exif') + result.audio = parse_boolean(request.args, 'audio') + result.min_length = parse_positive_int(request.args, 'min_length') + result.max_length = parse_positive_int(request.args, 'max_length') + result.preload = parse_fields(request.args, "include", FILE_FIELDS) + result.extensions = parse_seq(request.args, "extensions") + result.date_from = parse_date(request.args, "date_from") + result.date_to = parse_date(request.args, "date_to") + result.match_category = parse_enum(request.args, "matches", + values=FileMatchFilter.values, + default=FileMatchFilter.ALL) + result.sort = parse_enum(request.args, "sort", values=FileSort.values, default=None) + return result @api.route('/files/', methods=['GET']) def list_files(): - args = Arguments.parse() - - # Count files - config = get_config() - query = database.session.query(Files) - query = args.filter_by_file_attributes(query) - counts = Counts.get(query, config.related_distance, config.duplicate_distance) - - # Select files - sortable_attributes = args.sortable_attributes() - query = database.session.query(Files, *sortable_attributes) - query = args.filter_by_file_attributes(query) - query = args.filter_by_matches(query, config.related_distance, config.duplicate_distance) - query = args.sort_items(query, config.related_distance, config.duplicate_distance) - - # Retrieve slice - query = query.offset(args.offset).limit(args.limit) - items = query.all() + req = parse() - # Get files from result set if there are additional attributes. - if len(sortable_attributes) > 0: - items = [item[0] for item in items] + results = FilesDAO.list_files(req, database.session) + include_flags = {field.key: True for field in req.preload} return jsonify({ - 'items': [Transform.file_dict(item, **args.include) for item in items], - 'total': counts.total, - 'duplicates': counts.duplicates, - 'related': counts.related, - 'unique': counts.unique + 'items': [Transform.file_dict(item, **include_flags) for item in results.items], + 'total': results.counts.total, + 'duplicates': results.counts.duplicates, + 'related': results.counts.related, + 'unique': results.counts.unique }) @api.route('/files/', methods=['GET']) def get_file(file_id): - include = Arguments.parse_include() + extra_fields = parse_fields(request.args, "include", FILE_FIELDS) # Fetch file from database query = database.session.query(Files) + query = FILE_FIELDS.preload(query, extra_fields) file = query.filter(Files.id == file_id).first() # Handle file not found if file is None: abort(HTTPStatus.NOT_FOUND.value, f"File id not found: {file_id}") - data = Transform.file_dict(file, **include) - data["matches_count"] = file_matches(file_id).count() + include_flags = {field.key: True for field in extra_fields} + data = Transform.file_dict(file, **include_flags) + data["matches_count"] = FilesDAO.file_matches(file_id, database.session).count() return jsonify(data) diff --git a/server/server/api/helpers.py b/server/server/api/helpers.py index 692f2185..428a3ed1 100644 --- a/server/server/api/helpers.py +++ b/server/server/api/helpers.py @@ -5,21 +5,10 @@ from http import HTTPStatus from flask import current_app, abort -from sqlalchemy import or_ from sqlalchemy.orm import joinedload -from db.schema import Matches, Files from thumbnail.cache import ThumbnailCache from ..config import Config -from ..model import database - - -def file_matches(file_id): - """Query for all file matches.""" - return database.session.query(Matches).filter(or_( - Matches.query_video_file_id == file_id, - Matches.match_video_file_id == file_id - )) def get_config() -> Config: @@ -55,6 +44,14 @@ def parse_boolean(args, name): abort(HTTPStatus.BAD_REQUEST.value, f"{name} has invalid format (expected {_TRUTHY} or {_FALSY})") +def parse_seq(args, name): + """Parse sequence of comma-separated values.""" + seq = args.get(name, '', type=str) + items = [item.strip() for item in seq.split(',')] + items = [item for item in items if len(item) > 0] + return items + + def parse_positive_int(args, name, default=None): """Parse positive integer parameter.""" value = args.get(name, default=default, type=int) @@ -63,6 +60,14 @@ def parse_positive_int(args, name, default=None): return value +def parse_positive_float(args, name, default=None): + """Parse positive float parameter.""" + value = args.get(name, default=default, type=float) + if value is not default and value < 0: + abort(HTTPStatus.BAD_REQUEST.value, f"{name} cannot be negative") + return value + + DATE_PATTERN = re.compile(r'^\d{4}-\d{2}-\d{2}$') @@ -100,11 +105,10 @@ def parse_enum_seq(args, name, values, default=None): return result -def has_matches(threshold): - """Create a filter criteria to check if there is a match - with distance lesser or equal to the given threshold.""" - return or_(Files.source_matches.any(Matches.distance <= threshold), - Files.target_matches.any(Matches.distance <= threshold)) +def parse_fields(args, name, fields): + """Parse requested fields list.""" + field_names = parse_enum_seq(args, name, values=fields.names, default=()) + return {fields.get(name) for name in field_names} class Fields: @@ -124,10 +128,14 @@ def names(self): """Set of field names.""" return {field.key for field in self.fields} - def preload(self, query, names, *path): + def get(self, name): + """Get field by name.""" + return self._index[name] + + @staticmethod + def preload(query, fields, *path): """Enable eager loading for enumerated fields.""" - for name in names: - field = self._index[name] + for field in fields: full_path = path + (field,) query = query.options(joinedload(*full_path)) return query diff --git a/server/server/api/matches.py b/server/server/api/matches.py index 03985c37..dcebf854 100644 --- a/server/server/api/matches.py +++ b/server/server/api/matches.py @@ -1,10 +1,11 @@ from flask import jsonify, request from sqlalchemy.orm import joinedload +from db.files_dao import FilesDAO from db.schema import Matches, Files from .blueprint import api -from .helpers import file_matches, parse_positive_int, Fields, parse_enum_seq -from ..model import Transform +from .helpers import parse_positive_int, Fields, parse_enum_seq +from ..model import Transform, database # Optional file fields FILE_FIELDS = Fields(Files.exif, Files.signature, Files.meta, Files.scenes) @@ -14,9 +15,11 @@ def list_file_matches(file_id): limit = parse_positive_int(request.args, 'limit', 20) offset = parse_positive_int(request.args, 'offset', 0) + # hops = parse_positive_int(request.args, "hops", 1) + # min_distance = parse_positive_float(request.args, '') include_fields = parse_enum_seq(request.args, 'include', values=FILE_FIELDS.names, default=()) - query = file_matches(file_id).options( + query = FilesDAO.file_matches(file_id, database.session).options( joinedload(Matches.match_video_file), joinedload(Matches.query_video_file) ) From 51ee0271ef4bccfa95ebb703c807544dcee70dd3 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 16 Oct 2020 16:11:08 +0700 Subject: [PATCH 03/34] Update server tests --- server/tests/server/test_api.py | 38 ++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/server/tests/server/test_api.py b/server/tests/server/test_api.py index 5e94cfaa..615e2092 100644 --- a/server/tests/server/test_api.py +++ b/server/tests/server/test_api.py @@ -8,8 +8,8 @@ import pytest +from db.files_dao import FileMatchFilter, FileSort from db.schema import Files, Base, Exif, VideoMetadata, Scene, Matches -from server.api.files import MatchCategory, Sort from server.config import Config from server.main import create_application from server.model import database @@ -456,19 +456,19 @@ def test_list_files_filter_matches(client, app, config): assert_files(resp, expected=all_files, total=len(all_files), related=len(related), duplicates=len(duplicates)) # Get explicitly - resp = client.get(f"/api/v1/files/?matches={MatchCategory.ALL}&limit={len(all_files)}") + resp = client.get(f"/api/v1/files/?matches={FileMatchFilter.ALL}&limit={len(all_files)}") assert_files(resp, expected=all_files, total=len(all_files), related=len(related), duplicates=len(duplicates)) # Get unique - resp = client.get(f"/api/v1/files/?matches={MatchCategory.UNIQUE}&limit={len(all_files)}") + resp = client.get(f"/api/v1/files/?matches={FileMatchFilter.UNIQUE}&limit={len(all_files)}") assert_files(resp, expected=unique, total=len(all_files), related=len(related), duplicates=len(duplicates)) # Get related - resp = client.get(f"/api/v1/files/?matches={MatchCategory.RELATED}&limit={len(all_files)}") + resp = client.get(f"/api/v1/files/?matches={FileMatchFilter.RELATED}&limit={len(all_files)}") assert_files(resp, expected=related, total=len(all_files), related=len(related), duplicates=len(duplicates)) # Get duplicates - resp = client.get(f"/api/v1/files/?matches={MatchCategory.DUPLICATES}&limit={len(all_files)}") + resp = client.get(f"/api/v1/files/?matches={FileMatchFilter.DUPLICATES}&limit={len(all_files)}") assert_files(resp, expected=duplicates, total=len(all_files), related=len(related), duplicates=len(duplicates)) @@ -485,11 +485,11 @@ def test_list_files_sort_date(client, app): all_date_sorted = new + old # Get all - resp = client.get(f"/api/v1/files/?limit={len(all_date_sorted)}&sort={Sort.DATE}") + resp = client.get(f"/api/v1/files/?limit={len(all_date_sorted)}&sort={FileSort.DATE}") assert_files(resp, expected=all_date_sorted, total=len(all_date_sorted)) # Get old - resp = client.get(f"/api/v1/files/?limit={len(all_date_sorted)}&offset={len(new)}&sort={Sort.DATE}") + resp = client.get(f"/api/v1/files/?limit={len(all_date_sorted)}&offset={len(new)}&sort={FileSort.DATE}") assert_files(resp, expected=old, total=len(all_date_sorted)) @@ -505,11 +505,11 @@ def test_list_files_sort_length(client, app): all_length_sorted = long + short # Get all - resp = client.get(f"/api/v1/files/?limit={len(all_length_sorted)}&sort={Sort.LENGTH}") + resp = client.get(f"/api/v1/files/?limit={len(all_length_sorted)}&sort={FileSort.LENGTH}") assert_files(resp, expected=all_length_sorted, total=len(all_length_sorted)) # Get short - resp = client.get(f"/api/v1/files/?limit={len(all_length_sorted)}&offset={len(long)}&sort={Sort.LENGTH}") + resp = client.get(f"/api/v1/files/?limit={len(all_length_sorted)}&offset={len(long)}&sort={FileSort.LENGTH}") assert_files(resp, expected=short, total=len(all_length_sorted)) @@ -527,13 +527,13 @@ def test_list_files_sort_duplicates(client, app, config): all_dup_sorted = [a] + sorted([b, c], key=attr("id")) + sorted([d, e] + unique, key=attr("id")) # Get all - resp = client.get(f"/api/v1/files/?limit={len(all_dup_sorted)}&sort={Sort.DUPLICATES}") + resp = client.get(f"/api/v1/files/?limit={len(all_dup_sorted)}&sort={FileSort.DUPLICATES}") assert_files(resp, expected=all_dup_sorted, total=len(all_dup_sorted)) # Get slice offset = int(len(all_dup_sorted) / 2) limit = int(len(all_dup_sorted) / 4) - resp = client.get(f"/api/v1/files/?limit={limit}&offset={offset}&sort={Sort.DUPLICATES}") + resp = client.get(f"/api/v1/files/?limit={limit}&offset={offset}&sort={FileSort.DUPLICATES}") assert_files(resp, expected=all_dup_sorted[offset:offset + limit], total=len(all_dup_sorted)) @@ -552,13 +552,13 @@ def test_list_files_sort_related(client, app, config): all_rel_sorted = sorted([a, c, d], key=attr("id")) + sorted([b, e], key=attr("id")) + sorted(unique, key=attr("id")) # Get all - resp = client.get(f"/api/v1/files/?limit={len(all_rel_sorted)}&sort={Sort.RELATED}") + resp = client.get(f"/api/v1/files/?limit={len(all_rel_sorted)}&sort={FileSort.RELATED}") assert_files(resp, expected=all_rel_sorted, total=len(all_rel_sorted)) # Get slice offset = int(len(all_rel_sorted) / 2) limit = int(len(all_rel_sorted) / 4) - resp = client.get(f"/api/v1/files/?limit={limit}&offset={offset}&sort={Sort.RELATED}") + resp = client.get(f"/api/v1/files/?limit={limit}&offset={offset}&sort={FileSort.RELATED}") assert_files(resp, expected=all_rel_sorted[offset:offset + limit], total=len(all_rel_sorted)) @@ -586,8 +586,8 @@ def test_list_files_mixed_example(client, app, config): resp = client.get( f"/api/v1/files/?" f"min_length={length_large}&" - f"matches={MatchCategory.RELATED}&" - f"sort={Sort.DUPLICATES}&" + f"matches={FileMatchFilter.RELATED}&" + f"sort={FileSort.DUPLICATES}&" f"limit={len(all_files)}") expected = sorted([b, c], key=attr("id")) + [e] assert_files(resp, expected, total=4, related=len(expected)) @@ -596,8 +596,8 @@ def test_list_files_mixed_example(client, app, config): resp = client.get( f"/api/v1/files/?" f"max_length={length_small}&" - f"matches={MatchCategory.RELATED}&" - f"sort={Sort.DUPLICATES}&" + f"matches={FileMatchFilter.RELATED}&" + f"sort={FileSort.DUPLICATES}&" f"limit={len(all_files)}") expected = [a, d] assert_files(resp, expected, total=len(all_files) - 4, related=len(expected)) @@ -606,8 +606,8 @@ def test_list_files_mixed_example(client, app, config): resp = client.get( f"/api/v1/files/?" f"min_length={length_large}&" - f"matches={MatchCategory.UNIQUE}&" - f"sort={Sort.RELATED}&" + f"matches={FileMatchFilter.UNIQUE}&" + f"sort={FileSort.RELATED}&" f"limit={len(all_files)}") expected = [f] assert_files(resp, expected, total=4) From 59a8eee8ec98d7bfbd989cd19a286c625e9524b8 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 16 Oct 2020 22:48:55 +0700 Subject: [PATCH 04/34] Fix regression --- server/server/api/files.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/server/server/api/files.py b/server/server/api/files.py index 66584a65..b2b21bb2 100644 --- a/server/server/api/files.py +++ b/server/server/api/files.py @@ -8,15 +8,16 @@ from thumbnail.ffmpeg import extract_frame_tmp from .blueprint import api from .helpers import parse_boolean, parse_positive_int, parse_date, parse_enum, get_thumbnails, \ - resolve_video_file_path, Fields, parse_fields, parse_seq + resolve_video_file_path, Fields, parse_fields, parse_seq, get_config from ..model import database, Transform # Optional file fields to be loaded FILE_FIELDS = Fields(Files.exif, Files.meta, Files.signature, Files.scenes) -def parse(): +def parse_params(): """Parse and validate request arguments.""" + config = get_config() result = ListFilesRequest() result.limit = parse_positive_int(request.args, 'limit', 20) result.offset = parse_positive_int(request.args, 'offset', 0) @@ -29,16 +30,18 @@ def parse(): result.extensions = parse_seq(request.args, "extensions") result.date_from = parse_date(request.args, "date_from") result.date_to = parse_date(request.args, "date_to") - result.match_category = parse_enum(request.args, "matches", - values=FileMatchFilter.values, - default=FileMatchFilter.ALL) + result.match_filter = parse_enum(request.args, "matches", + values=FileMatchFilter.values, + default=FileMatchFilter.ALL) + result.related_distance = config.related_distance + result.duplicate_distance = config.duplicate_distance result.sort = parse_enum(request.args, "sort", values=FileSort.values, default=None) return result @api.route('/files/', methods=['GET']) def list_files(): - req = parse() + req = parse_params() results = FilesDAO.list_files(req, database.session) include_flags = {field.key: True for field in req.preload} From b664957ee598ea3dd88c06376da7efc0d2c26170 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 16 Oct 2020 22:50:34 +0700 Subject: [PATCH 05/34] Make module naming more ideomatic --- db/access/__init__.py | 0 db/{files_dao.py => access/files.py} | 0 server/server/api/files.py | 2 +- server/server/api/matches.py | 2 +- server/tests/server/test_api.py | 4 ++-- 5 files changed, 4 insertions(+), 4 deletions(-) create mode 100644 db/access/__init__.py rename db/{files_dao.py => access/files.py} (100%) diff --git a/db/access/__init__.py b/db/access/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/db/files_dao.py b/db/access/files.py similarity index 100% rename from db/files_dao.py rename to db/access/files.py diff --git a/server/server/api/files.py b/server/server/api/files.py index b2b21bb2..86df0e6d 100644 --- a/server/server/api/files.py +++ b/server/server/api/files.py @@ -3,7 +3,7 @@ from flask import jsonify, request, abort, send_from_directory -from db.files_dao import ListFilesRequest, FileMatchFilter, FileSort, FilesDAO +from db.access.files import ListFilesRequest, FileMatchFilter, FileSort, FilesDAO from db.schema import Files from thumbnail.ffmpeg import extract_frame_tmp from .blueprint import api diff --git a/server/server/api/matches.py b/server/server/api/matches.py index dcebf854..2aab3717 100644 --- a/server/server/api/matches.py +++ b/server/server/api/matches.py @@ -1,7 +1,7 @@ from flask import jsonify, request from sqlalchemy.orm import joinedload -from db.files_dao import FilesDAO +from db.access.files import FilesDAO from db.schema import Matches, Files from .blueprint import api from .helpers import parse_positive_int, Fields, parse_enum_seq diff --git a/server/tests/server/test_api.py b/server/tests/server/test_api.py index 615e2092..178f8872 100644 --- a/server/tests/server/test_api.py +++ b/server/tests/server/test_api.py @@ -8,7 +8,7 @@ import pytest -from db.files_dao import FileMatchFilter, FileSort +from db.access.files import FileMatchFilter, FileSort from db.schema import Files, Base, Exif, VideoMetadata, Scene, Matches from server.config import Config from server.main import create_application @@ -455,7 +455,7 @@ def test_list_files_filter_matches(client, app, config): resp = client.get(f"/api/v1/files/?limit={len(all_files)}") assert_files(resp, expected=all_files, total=len(all_files), related=len(related), duplicates=len(duplicates)) - # Get explicitly + # Get all explicitly resp = client.get(f"/api/v1/files/?matches={FileMatchFilter.ALL}&limit={len(all_files)}") assert_files(resp, expected=all_files, total=len(all_files), related=len(related), duplicates=len(duplicates)) From 673bcec0a49126609e2a6896fa14ba13a5fa3c59 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 16 Oct 2020 23:17:06 +0700 Subject: [PATCH 06/34] Resolve linting issues --- server/server/api/__init__.py | 8 +++++++- server/server/api/scenes.py | 4 +--- server/server/api/videos.py | 3 +-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/server/server/api/__init__.py b/server/server/api/__init__.py index 8a2da8b5..154a1461 100644 --- a/server/server/api/__init__.py +++ b/server/server/api/__init__.py @@ -1,2 +1,8 @@ +# Disable flake8 issue F401 as we need these imports to configure api +# but not going to re-export them from the __init__ +from . import scenes, matches, files, errors, videos # noqa: F401 from .blueprint import api -from . import scenes, matches, files, errors, videos + +# Explicitly reexport api +# See discussion in https://bugs.launchpad.net/pyflakes/+bug/1178905 +__all__ = ["api"] diff --git a/server/server/api/scenes.py b/server/server/api/scenes.py index eca3468e..40a9bbf4 100644 --- a/server/server/api/scenes.py +++ b/server/server/api/scenes.py @@ -1,7 +1,5 @@ -# import sys -# sys.path.append('..') +from flask import jsonify, request, url_for -from flask import jsonify, request, g, url_for, current_app from db.schema import Scene from .blueprint import api diff --git a/server/server/api/videos.py b/server/server/api/videos.py index 84612fcf..7c449272 100644 --- a/server/server/api/videos.py +++ b/server/server/api/videos.py @@ -5,13 +5,12 @@ from db.schema import Files from .blueprint import api -from .helpers import get_config, resolve_video_file_path +from .helpers import resolve_video_file_path from ..model import database @api.route('/files//watch') def watch_video(file_id): - config = get_config() file = database.session.query(Files).filter(Files.id == file_id).first() # Handle file not found From 3dcc7804d7ac9acc73825245c1c25ce70698d15a Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Tue, 20 Oct 2020 10:23:57 +0700 Subject: [PATCH 07/34] Support expunging by session scope --- db/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/db/__init__.py b/db/__init__.py index 40a1003c..e03a5fa1 100644 --- a/db/__init__.py +++ b/db/__init__.py @@ -36,13 +36,16 @@ def drop_tables(self): self.base.metadata.drop_all(bind=self.engine) @contextmanager - def session_scope(self): + def session_scope(self, expunge=False): """Provide a transactional scope.""" session = self.session() try: yield session + if expunge: + session.flush() + session.expunge_all() session.commit() - except: + except Exception: session.rollback() raise finally: From 8ef8a47b031fa283ac0ba09db45bfd990f31fe4d Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Tue, 20 Oct 2020 10:24:35 +0700 Subject: [PATCH 08/34] Draft matches extraction --- db/access/matches.py | 110 +++++++++++++++++++++ server/tests/db/access/test_matches_dao.py | 108 ++++++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 db/access/matches.py create mode 100644 server/tests/db/access/test_matches_dao.py diff --git a/db/access/matches.py b/db/access/matches.py new file mode 100644 index 00000000..95506834 --- /dev/null +++ b/db/access/matches.py @@ -0,0 +1,110 @@ +import itertools +from dataclasses import dataclass, field +from typing import List + +from sqlalchemy import Column +from sqlalchemy.orm import joinedload + +from db.schema import Files, Matches + + +def _chunks(iterable, size=100): + """Split iterable into equal-sized chunks.""" + iterator = iter(iterable) + chunk = list(itertools.islice(iterator, size)) + while chunk: + yield chunk + chunk = list(itertools.islice(iterator, size)) + + +@dataclass +class FileMatchesRequest: + """List single file's matches request.""" + file: Files + limit: int = 20 + offset: int = 0 + max_distance: float = 1.0 + min_distance: float = 0.0 + hops: int = 1 + preload: List[Column] = field(default_factory=list) + + +@dataclass +class FileMatchesResult: + """List single file's matches results.""" + files: List[Files] + matches: List[Matches] + + +class MatchesDAO: + """Data-access object for file matches.""" + + @staticmethod + def list_file_matches(req: FileMatchesRequest, session) -> FileMatchesResult: + """List single file's matches.""" + files = [] + matches = [] + seen_matches = set() + # ids of files that was loaded during previous + # steps or will be loaded during the current step + seen = {req.file.id} + # ids of files that will be loaded during the current step + current_step = {req.file.id} + # 'step' variable is always equal to the minimal distance (number of arrows) + # from the source file to the files that will be loaded during the current step + for step in range(req.hops + 1): + # ids of files that will be loaded during the next step + next_step = set() + more_steps = step < req.hops + + # Perform current step in equal-sized chunks + for chunk in _chunks(current_step): + query = session.query(Files).options( + joinedload(Files.source_matches), + joinedload(Files.target_matches)) + query = MatchesDAO._preload_file_attrs(query, req.preload) + items = query.filter(Files.id.in_(chunk)).all() + for file in items: + files.append(file) + if more_steps: + MatchesDAO._populate_next_step(file, seen, next_step) + seen.update(next_step) + current_step = next_step + matches = MatchesDAO._extract_matches(files, file_ids=seen) + return FileMatchesResult(files=files, matches=matches) + + @staticmethod + def _populate_next_step(file, seen, next_step): + """Add not-seen files to the next step.""" + for match in file.source_matches: + matched_file = match.match_video_file + if matched_file.id not in seen: + next_step.add(matched_file.id) + for match in file.target_matches: + matched_file = match.query_video_file + if matched_file.id not in seen: + next_step.add(matched_file.id) + + @staticmethod + def _preload_file_attrs(query, preload): + """Preload requested optional file attributes.""" + for relation in preload: + query = query.options(joinedload(relation)) + return query + + @staticmethod + def _extract_matches(files, file_ids): + """Build matches list.""" + matches = [] + for file in files: + for match in file.target_matches: + if match.match_video_file_id in file_ids and match.query_video_file_id in file_ids: + matches.append(match) + for match in file.source_matches: + if match.match_video_file_id in file_ids and match.query_video_file_id in file_ids: + matches.append(match) + return matches + + @staticmethod + def _apply_match_filters(query, req): + """Apply filters by match attributes.""" diff --git a/server/tests/db/access/test_matches_dao.py b/server/tests/db/access/test_matches_dao.py new file mode 100644 index 00000000..3bda78e9 --- /dev/null +++ b/server/tests/db/access/test_matches_dao.py @@ -0,0 +1,108 @@ +import itertools +from uuid import uuid4 as uuid + +import pytest + +from db import Database +from db.access.matches import MatchesDAO, FileMatchesRequest, FileMatchesResult +from db.schema import Files, Exif, VideoMetadata, Scene, Matches + + +def make_file(prefix="", length=42, ext="flv", scenes=((0, 1), (1, 2))): + """Create unique file.""" + path = f"{prefix}some/path/{uuid()}.{ext}" + sha256 = f"hash-of-{path}" + return Files(file_path=path, sha256=sha256, + exif=Exif(General_FileExtension=ext, ), + meta=VideoMetadata(video_length=length), + scenes=[Scene(start_time=start, duration=duration) for start, duration in scenes]) + + +def make_files(count, prefix="", length=42, ext="flv", scenes=((0, 1), (1, 2))): + """Create a collection of unique files.""" + return [ + make_file(prefix=prefix, length=length, ext=ext, scenes=scenes) for _ in range(count) + ] + + +def link(source, target, distance=0.5): + """Create a match between files.""" + return Matches(query_video_file=source, match_video_file=target, distance=distance) + + +@pytest.fixture +def database(): + """Create test database.""" + in_memory_database = Database.in_memory(echo=False) + in_memory_database.create_tables() + return in_memory_database + + +def pop(queue, max_count): + """Pop multiple items from queue.""" + result = [] + for _ in range(max_count): + if len(queue) == 0: + return result + result.append(queue.pop()) + return result + + +def chunks(iterable, size=100): + """Split iterable into equal-sized chunks.""" + iterator = iter(iterable) + chunk = list(itertools.islice(iterator, size)) + while chunk: + yield chunk + chunk = list(itertools.islice(iterator, size)) + + +def assert_file_set(resp: FileMatchesResult, expected): + """Check result file set.""" + expected = {file.id for file in expected} + actual = {file.id for file in resp.files} + assert actual == expected + + +def test_list_file_matches_multiple_paths(database: Database): + with database.session_scope(expunge=True) as session: + # Create files + source = make_file() + path_a = make_files(4) + path_b = make_files(4) + session.add(source) + session.add_all(path_a) + session.add_all(path_b) + + # Link files + a1, a2, a3, a4 = path_a + b1, b2, b3, b4 = path_b + session.add_all([ + link(source, a1), link(a2, a1), link(a2, a3), link(a4, a3), + link(b1, source), link(b1, b2), link(b2, b3), link(b4, b3), + ]) + + with database.session_scope() as session: + req = FileMatchesRequest(file=source, hops=0) + resp = MatchesDAO.list_file_matches(req, session) + assert_file_set(resp, expected=[source]) + + with database.session_scope() as session: + req = FileMatchesRequest(file=source, hops=1) + resp = MatchesDAO.list_file_matches(req, session) + assert_file_set(resp, expected=[source, a1, b1]) + + with database.session_scope() as session: + req = FileMatchesRequest(file=source, hops=2) + resp = MatchesDAO.list_file_matches(req, session) + assert_file_set(resp, expected=[source, a1, a2, b1, b2]) + + with database.session_scope() as session: + req = FileMatchesRequest(file=source, hops=3) + resp = MatchesDAO.list_file_matches(req, session) + assert_file_set(resp, expected=[source, a1, a2, a3, b1, b2, b3]) + + with database.session_scope(expunge=True) as session: + req = FileMatchesRequest(file=source, hops=4) + resp = MatchesDAO.list_file_matches(req, session) + assert_file_set(resp, expected=[source, a1, a2, a3, a4, b1, b2, b3, b4]) From cf817ad3b51863716ccbc510a6da4b1110d403ee Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Tue, 20 Oct 2020 10:55:25 +0700 Subject: [PATCH 09/34] Filter matches by distance --- db/access/matches.py | 24 ++++++++++---- server/tests/db/access/test_matches_dao.py | 38 ++++++++++++++++++++++ 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/db/access/matches.py b/db/access/matches.py index 95506834..c301d5eb 100644 --- a/db/access/matches.py +++ b/db/access/matches.py @@ -3,7 +3,7 @@ from typing import List from sqlalchemy import Column -from sqlalchemy.orm import joinedload +from sqlalchemy.orm import joinedload, aliased, contains_eager from db.schema import Files, Matches @@ -43,8 +43,6 @@ class MatchesDAO: def list_file_matches(req: FileMatchesRequest, session) -> FileMatchesResult: """List single file's matches.""" files = [] - matches = [] - seen_matches = set() # ids of files that was loaded during previous # steps or will be loaded during the current step seen = {req.file.id} @@ -59,9 +57,8 @@ def list_file_matches(req: FileMatchesRequest, session) -> FileMatchesResult: # Perform current step in equal-sized chunks for chunk in _chunks(current_step): - query = session.query(Files).options( - joinedload(Files.source_matches), - joinedload(Files.target_matches)) + query = session.query(Files) + query = MatchesDAO._join_matches(query, req) query = MatchesDAO._preload_file_attrs(query, req.preload) items = query.filter(Files.id.in_(chunk)).all() for file in items: @@ -106,5 +103,18 @@ def _extract_matches(files, file_ids): return matches @staticmethod - def _apply_match_filters(query, req): + def _join_matches(query, req): """Apply filters by match attributes.""" + outgoing = aliased(Matches) + incoming = aliased(Matches) + return query. \ + outerjoin(outgoing, + (outgoing.query_video_file_id == Files.id) & + (outgoing.distance >= req.min_distance) & + (outgoing.distance <= req.max_distance)). \ + outerjoin(incoming, + (incoming.match_video_file_id == Files.id) & + (incoming.distance >= req.min_distance) & + (incoming.distance <= req.max_distance)). \ + options(contains_eager(Files.source_matches, alias=outgoing)). \ + options(contains_eager(Files.target_matches, alias=incoming)) diff --git a/server/tests/db/access/test_matches_dao.py b/server/tests/db/access/test_matches_dao.py index 3bda78e9..1d58740c 100644 --- a/server/tests/db/access/test_matches_dao.py +++ b/server/tests/db/access/test_matches_dao.py @@ -106,3 +106,41 @@ def test_list_file_matches_multiple_paths(database: Database): req = FileMatchesRequest(file=source, hops=4) resp = MatchesDAO.list_file_matches(req, session) assert_file_set(resp, expected=[source, a1, a2, a3, a4, b1, b2, b3, b4]) + + +def test_list_file_matches_filter_distance(database: Database): + short, long = 0.1, 0.9 + with database.session_scope(expunge=True) as session: + # Create files + source = make_file() + path_a = make_files(4) + path_b = make_files(4) + session.add(source) + session.add_all(path_a) + session.add_all(path_b) + + # Link files + a1, a2, a3, a4 = path_a + b1, b2, b3, b4 = path_b + session.add_all([ + link(source, a1, short), link(a2, a1, short), link(a2, a3, short), link(a4, a3, short), + link(b1, source, long), link(b1, b2, long), link(b2, b3, long), link(b4, b3, long), + ]) + + # Query all + with database.session_scope(expunge=True) as session: + req = FileMatchesRequest(file=source, hops=4) + resp = MatchesDAO.list_file_matches(req, session) + assert_file_set(resp, expected=[source, a1, a2, a3, a4, b1, b2, b3, b4]) + + # Query short + with database.session_scope(expunge=True) as session: + req = FileMatchesRequest(file=source, hops=4, max_distance=short) + resp = MatchesDAO.list_file_matches(req, session) + assert_file_set(resp, expected=[source, a1, a2, a3, a4]) + + # Query long + with database.session_scope(expunge=True) as session: + req = FileMatchesRequest(file=source, hops=4, min_distance=long) + resp = MatchesDAO.list_file_matches(req, session) + assert_file_set(resp, expected=[source, b1, b2, b3, b4]) From 3f6c8cc9ac5cf6626bcc1d33519d88613fcbc696 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Tue, 20 Oct 2020 12:02:47 +0700 Subject: [PATCH 10/34] Test query matches with cycles --- server/tests/db/access/test_matches_dao.py | 44 +++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/server/tests/db/access/test_matches_dao.py b/server/tests/db/access/test_matches_dao.py index 1d58740c..d80c1d61 100644 --- a/server/tests/db/access/test_matches_dao.py +++ b/server/tests/db/access/test_matches_dao.py @@ -64,7 +64,7 @@ def assert_file_set(resp: FileMatchesResult, expected): assert actual == expected -def test_list_file_matches_multiple_paths(database: Database): +def test_list_file_matches_hops(database: Database): with database.session_scope(expunge=True) as session: # Create files source = make_file() @@ -144,3 +144,45 @@ def test_list_file_matches_filter_distance(database: Database): req = FileMatchesRequest(file=source, hops=4, min_distance=long) resp = MatchesDAO.list_file_matches(req, session) assert_file_set(resp, expected=[source, b1, b2, b3, b4]) + + +def test_list_file_matches_filter_cycles(database: Database): + hops = 100 + with database.session_scope(expunge=True) as session: + source = make_file() + linked = make_files(2) + prev1, prev2 = linked + session.add_all([source, link(source, prev1), link(source, prev2)]) + + for _ in range(hops - 1): + cur1, cur2 = make_files(2) + session.add_all([ + link(prev1, cur1), link(prev1, cur2), + link(cur2, prev2), link(cur1, prev2)]) + linked.append(cur1) + linked.append(cur2) + prev1, prev2 = cur1, cur2 + + # Query all + with database.session_scope(expunge=True) as session: + req = FileMatchesRequest(file=source, hops=hops) + resp = MatchesDAO.list_file_matches(req, session) + assert_file_set(resp, expected=[source] + linked) + + # Query half + with database.session_scope(expunge=True) as session: + half = int(hops / 2) + req = FileMatchesRequest(file=source, hops=half) + resp = MatchesDAO.list_file_matches(req, session) + assert_file_set(resp, expected=[source] + linked[:2 * half]) + + # Short cut the most distant items + with database.session_scope(expunge=True) as session: + session.add_all([link(source, cur1), link(source, cur2)]) + + # Query half hops must return all files now + with database.session_scope(expunge=True) as session: + half = int(hops / 2) + req = FileMatchesRequest(file=source, hops=half) + resp = MatchesDAO.list_file_matches(req, session) + assert_file_set(resp, expected=[source] + linked) From f17623b84df970a051840509680f7286671e7a22 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Tue, 20 Oct 2020 15:34:00 +0700 Subject: [PATCH 11/34] Test match loading --- server/tests/db/access/test_matches_dao.py | 57 +++++++++++++++------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/server/tests/db/access/test_matches_dao.py b/server/tests/db/access/test_matches_dao.py index d80c1d61..04fbf51c 100644 --- a/server/tests/db/access/test_matches_dao.py +++ b/server/tests/db/access/test_matches_dao.py @@ -57,11 +57,11 @@ def chunks(iterable, size=100): chunk = list(itertools.islice(iterator, size)) -def assert_file_set(resp: FileMatchesResult, expected): - """Check result file set.""" - expected = {file.id for file in expected} - actual = {file.id for file in resp.files} - assert actual == expected +def assert_same(actual, expected): + """Check result id set.""" + expected_ids = {entity.id for entity in expected} + actual_ids = {entity.id for entity in actual} + assert actual_ids == expected_ids def test_list_file_matches_hops(database: Database): @@ -85,27 +85,27 @@ def test_list_file_matches_hops(database: Database): with database.session_scope() as session: req = FileMatchesRequest(file=source, hops=0) resp = MatchesDAO.list_file_matches(req, session) - assert_file_set(resp, expected=[source]) + assert_same(resp.files, expected=[source]) with database.session_scope() as session: req = FileMatchesRequest(file=source, hops=1) resp = MatchesDAO.list_file_matches(req, session) - assert_file_set(resp, expected=[source, a1, b1]) + assert_same(resp.files, expected=[source, a1, b1]) with database.session_scope() as session: req = FileMatchesRequest(file=source, hops=2) resp = MatchesDAO.list_file_matches(req, session) - assert_file_set(resp, expected=[source, a1, a2, b1, b2]) + assert_same(resp.files, expected=[source, a1, a2, b1, b2]) with database.session_scope() as session: req = FileMatchesRequest(file=source, hops=3) resp = MatchesDAO.list_file_matches(req, session) - assert_file_set(resp, expected=[source, a1, a2, a3, b1, b2, b3]) + assert_same(resp.files, expected=[source, a1, a2, a3, b1, b2, b3]) with database.session_scope(expunge=True) as session: req = FileMatchesRequest(file=source, hops=4) resp = MatchesDAO.list_file_matches(req, session) - assert_file_set(resp, expected=[source, a1, a2, a3, a4, b1, b2, b3, b4]) + assert_same(resp.files, expected=[source, a1, a2, a3, a4, b1, b2, b3, b4]) def test_list_file_matches_filter_distance(database: Database): @@ -131,19 +131,19 @@ def test_list_file_matches_filter_distance(database: Database): with database.session_scope(expunge=True) as session: req = FileMatchesRequest(file=source, hops=4) resp = MatchesDAO.list_file_matches(req, session) - assert_file_set(resp, expected=[source, a1, a2, a3, a4, b1, b2, b3, b4]) + assert_same(resp.files, expected=[source, a1, a2, a3, a4, b1, b2, b3, b4]) # Query short with database.session_scope(expunge=True) as session: req = FileMatchesRequest(file=source, hops=4, max_distance=short) resp = MatchesDAO.list_file_matches(req, session) - assert_file_set(resp, expected=[source, a1, a2, a3, a4]) + assert_same(resp.files, expected=[source, a1, a2, a3, a4]) # Query long with database.session_scope(expunge=True) as session: req = FileMatchesRequest(file=source, hops=4, min_distance=long) resp = MatchesDAO.list_file_matches(req, session) - assert_file_set(resp, expected=[source, b1, b2, b3, b4]) + assert_same(resp.files, expected=[source, b1, b2, b3, b4]) def test_list_file_matches_filter_cycles(database: Database): @@ -167,16 +167,16 @@ def test_list_file_matches_filter_cycles(database: Database): with database.session_scope(expunge=True) as session: req = FileMatchesRequest(file=source, hops=hops) resp = MatchesDAO.list_file_matches(req, session) - assert_file_set(resp, expected=[source] + linked) + assert_same(resp.files, expected=[source] + linked) # Query half with database.session_scope(expunge=True) as session: half = int(hops / 2) req = FileMatchesRequest(file=source, hops=half) resp = MatchesDAO.list_file_matches(req, session) - assert_file_set(resp, expected=[source] + linked[:2 * half]) + assert_same(resp.files, expected=[source] + linked[:2 * half]) - # Short cut the most distant items + # Create a short cut from the source to the most distant items with database.session_scope(expunge=True) as session: session.add_all([link(source, cur1), link(source, cur2)]) @@ -185,4 +185,27 @@ def test_list_file_matches_filter_cycles(database: Database): half = int(hops / 2) req = FileMatchesRequest(file=source, hops=half) resp = MatchesDAO.list_file_matches(req, session) - assert_file_set(resp, expected=[source] + linked) + assert_same(resp.files, expected=[source] + linked) + + +def test_list_file_matches_links(database: Database): + with database.session_scope(expunge=True) as session: + source = make_file() + a, b, c = make_files(3) + close_links = [link(source, a), link(source, b), link(a, b)] + far_links = [link(a, c), link(b, c)] + session.add_all(close_links + far_links) + + # Query close links + with database.session_scope(expunge=True) as session: + req = FileMatchesRequest(file=source, hops=1) + resp = MatchesDAO.list_file_matches(req, session) + assert_same(resp.files, expected=[source, a, b]) + assert_same(resp.matches, expected=close_links) + + # Query all links + with database.session_scope(expunge=True) as session: + req = FileMatchesRequest(file=source, hops=2) + resp = MatchesDAO.list_file_matches(req, session) + assert_same(resp.files, expected=[source, a, b, c]) + assert_same(resp.matches, expected=close_links + far_links) From 9a447552489d0a8ae49c295072eb90c04ed01523 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Wed, 21 Oct 2020 21:00:11 +0700 Subject: [PATCH 12/34] Emulate matches pagination --- db/access/matches.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/db/access/matches.py b/db/access/matches.py index c301d5eb..5ca25463 100644 --- a/db/access/matches.py +++ b/db/access/matches.py @@ -34,6 +34,7 @@ class FileMatchesResult: """List single file's matches results.""" files: List[Files] matches: List[Matches] + total: int class MatchesDAO: @@ -68,7 +69,10 @@ def list_file_matches(req: FileMatchesRequest, session) -> FileMatchesResult: seen.update(next_step) current_step = next_step matches = MatchesDAO._extract_matches(files, file_ids=seen) - return FileMatchesResult(files=files, matches=matches) + + # Slice result set + matches, files, total = MatchesDAO._slice_results(req.file, matches, offset=req.offset, limit=req.limit) + return FileMatchesResult(files=files, matches=matches, total=total) @staticmethod def _populate_next_step(file, seen, next_step): @@ -92,14 +96,16 @@ def _preload_file_attrs(query, preload): @staticmethod def _extract_matches(files, file_ids): """Build matches list.""" - matches = [] + matches = set() for file in files: for match in file.target_matches: if match.match_video_file_id in file_ids and match.query_video_file_id in file_ids: - matches.append(match) + matches.add(match) for match in file.source_matches: if match.match_video_file_id in file_ids and match.query_video_file_id in file_ids: - matches.append(match) + matches.add(match) + matches = list(matches) + matches.sort(key=lambda item: item.id) return matches @staticmethod @@ -118,3 +124,19 @@ def _join_matches(query, req): (incoming.distance <= req.max_distance)). \ options(contains_eager(Files.source_matches, alias=outgoing)). \ options(contains_eager(Files.target_matches, alias=incoming)) + + @staticmethod + def _slice_results(start_file, matches, offset, limit): + """Extract requested slice from matches.""" + # Slice matches + total = len(matches) + matches = sorted(matches, key=lambda item: item.id) + matches = matches[offset:offset + limit] + + # Get the corresponding files + files = {start_file} + for match in matches: + files.add(match.match_video_file) + files.add(match.query_video_file) + files = sorted(list(files), key=lambda item: item.id) + return matches, files, total From a8e2e36153946448b86e7419613338fcbbd41c2b Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Wed, 21 Oct 2020 21:14:14 +0700 Subject: [PATCH 13/34] Update MatchesDAO tests --- server/tests/db/access/test_matches_dao.py | 33 ++++++++++++---------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/server/tests/db/access/test_matches_dao.py b/server/tests/db/access/test_matches_dao.py index 04fbf51c..c8f8dc23 100644 --- a/server/tests/db/access/test_matches_dao.py +++ b/server/tests/db/access/test_matches_dao.py @@ -4,7 +4,7 @@ import pytest from db import Database -from db.access.matches import MatchesDAO, FileMatchesRequest, FileMatchesResult +from db.access.matches import MatchesDAO, FileMatchesRequest from db.schema import Files, Exif, VideoMetadata, Scene, Matches @@ -77,33 +77,34 @@ def test_list_file_matches_hops(database: Database): # Link files a1, a2, a3, a4 = path_a b1, b2, b3, b4 = path_b - session.add_all([ + all_links = [ link(source, a1), link(a2, a1), link(a2, a3), link(a4, a3), link(b1, source), link(b1, b2), link(b2, b3), link(b4, b3), - ]) + ] + session.add_all(all_links) with database.session_scope() as session: - req = FileMatchesRequest(file=source, hops=0) + req = FileMatchesRequest(file=source, hops=0, limit=len(all_links)) resp = MatchesDAO.list_file_matches(req, session) assert_same(resp.files, expected=[source]) with database.session_scope() as session: - req = FileMatchesRequest(file=source, hops=1) + req = FileMatchesRequest(file=source, hops=1, limit=len(all_links)) resp = MatchesDAO.list_file_matches(req, session) assert_same(resp.files, expected=[source, a1, b1]) with database.session_scope() as session: - req = FileMatchesRequest(file=source, hops=2) + req = FileMatchesRequest(file=source, hops=2, limit=len(all_links)) resp = MatchesDAO.list_file_matches(req, session) assert_same(resp.files, expected=[source, a1, a2, b1, b2]) with database.session_scope() as session: - req = FileMatchesRequest(file=source, hops=3) + req = FileMatchesRequest(file=source, hops=3, limit=len(all_links)) resp = MatchesDAO.list_file_matches(req, session) assert_same(resp.files, expected=[source, a1, a2, a3, b1, b2, b3]) with database.session_scope(expunge=True) as session: - req = FileMatchesRequest(file=source, hops=4) + req = FileMatchesRequest(file=source, hops=4, limit=len(all_links)) resp = MatchesDAO.list_file_matches(req, session) assert_same(resp.files, expected=[source, a1, a2, a3, a4, b1, b2, b3, b4]) @@ -152,27 +153,28 @@ def test_list_file_matches_filter_cycles(database: Database): source = make_file() linked = make_files(2) prev1, prev2 = linked - session.add_all([source, link(source, prev1), link(source, prev2)]) + links = [link(source, prev1), link(source, prev2)] for _ in range(hops - 1): cur1, cur2 = make_files(2) - session.add_all([ + links.extend([ link(prev1, cur1), link(prev1, cur2), link(cur2, prev2), link(cur1, prev2)]) linked.append(cur1) linked.append(cur2) prev1, prev2 = cur1, cur2 + session.add_all(links) # Query all with database.session_scope(expunge=True) as session: - req = FileMatchesRequest(file=source, hops=hops) + req = FileMatchesRequest(file=source, hops=hops, limit=len(links)) resp = MatchesDAO.list_file_matches(req, session) assert_same(resp.files, expected=[source] + linked) # Query half with database.session_scope(expunge=True) as session: half = int(hops / 2) - req = FileMatchesRequest(file=source, hops=half) + req = FileMatchesRequest(file=source, hops=half, limit=len(links)) resp = MatchesDAO.list_file_matches(req, session) assert_same(resp.files, expected=[source] + linked[:2 * half]) @@ -183,7 +185,7 @@ def test_list_file_matches_filter_cycles(database: Database): # Query half hops must return all files now with database.session_scope(expunge=True) as session: half = int(hops / 2) - req = FileMatchesRequest(file=source, hops=half) + req = FileMatchesRequest(file=source, hops=half, limit=len(links)) resp = MatchesDAO.list_file_matches(req, session) assert_same(resp.files, expected=[source] + linked) @@ -195,17 +197,18 @@ def test_list_file_matches_links(database: Database): close_links = [link(source, a), link(source, b), link(a, b)] far_links = [link(a, c), link(b, c)] session.add_all(close_links + far_links) + total_links = len(far_links) + len(close_links) # Query close links with database.session_scope(expunge=True) as session: - req = FileMatchesRequest(file=source, hops=1) + req = FileMatchesRequest(file=source, hops=1, limit=total_links) resp = MatchesDAO.list_file_matches(req, session) assert_same(resp.files, expected=[source, a, b]) assert_same(resp.matches, expected=close_links) # Query all links with database.session_scope(expunge=True) as session: - req = FileMatchesRequest(file=source, hops=2) + req = FileMatchesRequest(file=source, hops=2, limit=total_links) resp = MatchesDAO.list_file_matches(req, session) assert_same(resp.files, expected=[source, a, b, c]) assert_same(resp.matches, expected=close_links + far_links) From 6ce85727edae6d2b67b8e10997621d444247b25c Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Wed, 21 Oct 2020 21:14:54 +0700 Subject: [PATCH 14/34] Migrate server.api to MatchesDAO --- server/server/api/files.py | 2 +- server/server/api/matches.py | 59 +++++++++++++++++++----------------- server/server/model.py | 15 +++++++++ 3 files changed, 48 insertions(+), 28 deletions(-) diff --git a/server/server/api/files.py b/server/server/api/files.py index 86df0e6d..3b2970f1 100644 --- a/server/server/api/files.py +++ b/server/server/api/files.py @@ -62,7 +62,7 @@ def get_file(file_id): # Fetch file from database query = database.session.query(Files) query = FILE_FIELDS.preload(query, extra_fields) - file = query.filter(Files.id == file_id).first() + file = query.get(file_id) # Handle file not found if file is None: diff --git a/server/server/api/matches.py b/server/server/api/matches.py index 2aab3717..abfc9706 100644 --- a/server/server/api/matches.py +++ b/server/server/api/matches.py @@ -1,39 +1,44 @@ -from flask import jsonify, request -from sqlalchemy.orm import joinedload +from http import HTTPStatus -from db.access.files import FilesDAO -from db.schema import Matches, Files +from flask import jsonify, request, abort + +from db.access.matches import FileMatchesRequest, MatchesDAO +from db.schema import Files from .blueprint import api -from .helpers import parse_positive_int, Fields, parse_enum_seq +from .helpers import parse_positive_int, Fields, parse_positive_float, parse_fields from ..model import Transform, database # Optional file fields FILE_FIELDS = Fields(Files.exif, Files.signature, Files.meta, Files.scenes) +def parse_params(file): + """Parse request parameters.""" + req = FileMatchesRequest(file=file) + req.limit = parse_positive_int(request.args, 'limit', 20) + req.offset = parse_positive_int(request.args, 'offset', 0) + req.hops = parse_positive_int(request.args, 'hops', 1) + req.min_distance = parse_positive_float(request.args, 'min_distance', 0.0) + req.max_distance = parse_positive_float(request.args, 'max_distance', 1.0) + req.preload = parse_fields(request.args, 'include', FILE_FIELDS) + return req + + @api.route('/files//matches', methods=['GET']) def list_file_matches(file_id): - limit = parse_positive_int(request.args, 'limit', 20) - offset = parse_positive_int(request.args, 'offset', 0) - # hops = parse_positive_int(request.args, "hops", 1) - # min_distance = parse_positive_float(request.args, '') - include_fields = parse_enum_seq(request.args, 'include', values=FILE_FIELDS.names, default=()) - - query = FilesDAO.file_matches(file_id, database.session).options( - joinedload(Matches.match_video_file), - joinedload(Matches.query_video_file) - ) - - # Preload file fields - query = FILE_FIELDS.preload(query, include_fields, Matches.match_video_file) - query = FILE_FIELDS.preload(query, include_fields, Matches.query_video_file) - - # Get requested slice - total = query.count() - items = query.offset(offset).limit(limit).all() - - include_flags = {field: True for field in include_fields} + file = database.session.query(Files).get(file_id) + + # Handle file not found + if file is None: + abort(HTTPStatus.NOT_FOUND.value, f"File id not found: {file_id}") + + req = parse_params(file) + resp = MatchesDAO.list_file_matches(req, database.session) + + include_flags = {field.key: True for field in req.preload} return jsonify({ - 'items': [Transform.file_match_dict(item, file_id, **include_flags) for item in items], - 'total': total + 'files': [Transform.file_dict(file, **include_flags) for file in resp.files], + 'matches': [Transform.match_dict(match) for match in resp.matches], + 'total': resp.total, + 'hops': req.hops, }) diff --git a/server/server/model.py b/server/server/model.py index 527cad31..613bd7eb 100644 --- a/server/server/model.py +++ b/server/server/model.py @@ -48,6 +48,7 @@ class Transform: @staticmethod @serializable def file_dict(file, *, meta=False, signature=False, scenes=False, exif=False): + """Get plain data representation for single file.""" data = { "id": file.id, "file_path": file.file_path, @@ -67,6 +68,7 @@ def file_dict(file, *, meta=False, signature=False, scenes=False, exif=False): @staticmethod @serializable def metadata_dict(meta): + """Get plain data representation for VideoMetadata.""" fields = entity_fields(meta) fields -= {"id", "file_id", "file"} return {field: getattr(meta, field) for field in fields} @@ -74,6 +76,7 @@ def metadata_dict(meta): @staticmethod @serializable def scene_dict(scene, file=False): + """Get plain data representation for single Scene.""" data = { "id": scene.id, "duration": scene.duration, @@ -86,6 +89,7 @@ def scene_dict(scene, file=False): @staticmethod @serializable def exif_dict(exif): + """Get plain data representation for Exif.""" fields = entity_fields(exif) fields -= {"id", "file_id", "file", "Json_full_exif"} return {field: getattr(exif, field) for field in fields} @@ -93,6 +97,7 @@ def exif_dict(exif): @staticmethod @serializable def file_match_dict(match, file_id, *, meta=False, signature=False, scenes=False, exif=False): + """Get plain data representation for single file match.""" if match.query_video_file.id != file_id: matched = match.query_video_file else: @@ -101,3 +106,13 @@ def file_match_dict(match, file_id, *, meta=False, signature=False, scenes=False "distance": match.distance, "file": Transform.file_dict(matched, meta=meta, signature=signature, scenes=scenes, exif=exif) } + + @staticmethod + @serializable + def match_dict(match): + """Get plain data representation for Match.""" + return { + "distance": match.distance, + "source": match.query_video_file_id, + "target": match.match_video_file_id + } From 4c3a7dcde4f8c9daf68cfe6287c52ce2b0960c14 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Wed, 21 Oct 2020 21:15:15 +0700 Subject: [PATCH 15/34] Update api/matches tests --- server/tests/server/test_api.py | 41 ++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/server/tests/server/test_api.py b/server/tests/server/test_api.py index 178f8872..a137480b 100644 --- a/server/tests/server/test_api.py +++ b/server/tests/server/test_api.py @@ -627,15 +627,21 @@ def test_list_file_matches_basic(client, app): ] session.add_all(matches) + all_files = sorted(all_files, key=attr("id")) matches = sorted(matches, key=attr("id")) # Get all matches resp = client.get(f"/api/v1/files/{source.id}/matches") assert_json_response(resp, { "total": len(matches), - "items": [ - {"distance": match.distance, "file": {"id": match.match_video_file_id}} for match in matches - ] + "matches": [ + { + "distance": match.distance, + "source": match.query_video_file_id, + "target": match.match_video_file_id + } for match in matches + ], + "files": [{"file_path": file.file_path, "sha256": file.sha256} for file in all_files] }) # Get slice @@ -644,13 +650,20 @@ def test_list_file_matches_basic(client, app): resp = client.get(f"/api/v1/files/{source.id}/matches?offset={offset}&limit={limit}") assert_json_response(resp, { "total": len(matches), - "items": [ + "matches": [ { "distance": match.distance, - "file": {"id": match.match_video_file_id} + "source": match.query_video_file_id, + "target": match.match_video_file_id } for match in matches[offset:offset + limit] - ] + ], }) + payload = json_payload(resp) + expected_file_ids = {match.query_video_file_id for match in matches[offset:offset + limit]} + expected_file_ids |= {match.match_video_file_id for match in matches[offset:offset + limit]} + actual_file_ids = {file["id"] for file in payload["files"]} + assert actual_file_ids == expected_file_ids + assert len(payload["files"]) def test_list_file_matches_include(client, app): @@ -664,27 +677,27 @@ def test_list_file_matches_include(client, app): ] session.add_all(matches) + files = sorted([source, a, b], key=attr("id")) matches = sorted(matches, key=attr("id")) # Don't include additional fields resp = client.get(f"/api/v1/files/{source.id}/matches") assert all( - {"exif", "meta", "scenes"}.isdisjoint(match["file"].keys()) for match in items(resp) + {"exif", "meta", "scenes"}.isdisjoint(file.keys()) for file in json_payload(resp)["files"] ) # Include meta and exif resp = client.get(f"/api/v1/files/{source.id}/matches?include=meta,exif") assert_json_response(resp, { "total": len(matches), - "items": [ + "files": [ { - "file": { - "meta": {"video_length": match.match_video_file.meta.video_length}, - "exif": {"General_FileExtension": match.match_video_file.exif.General_FileExtension} - } - } for match in matches + "meta": {"video_length": file.meta.video_length}, + "exif": {"General_FileExtension": file.exif.General_FileExtension} + + } for file in files ] }) assert all( - "scenes" not in match["file"].keys() for match in items(resp) + "scenes" not in file.keys() for file in json_payload(resp)["files"] ) From ba56b8c7340f632903129e8c1c3e0ee2fa9c4218 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Wed, 21 Oct 2020 21:47:17 +0700 Subject: [PATCH 16/34] Test multiple hops with cycles --- server/server/model.py | 1 + server/tests/server/test_api.py | 75 ++++++++++++++++++++++++++++++--- 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/server/server/model.py b/server/server/model.py index 613bd7eb..d72635cf 100644 --- a/server/server/model.py +++ b/server/server/model.py @@ -112,6 +112,7 @@ def file_match_dict(match, file_id, *, meta=False, signature=False, scenes=False def match_dict(match): """Get plain data representation for Match.""" return { + "id": match.id, "distance": match.distance, "source": match.query_video_file_id, "target": match.match_video_file_id diff --git a/server/tests/server/test_api.py b/server/tests/server/test_api.py index a137480b..0830a294 100644 --- a/server/tests/server/test_api.py +++ b/server/tests/server/test_api.py @@ -88,6 +88,30 @@ def assert_files(resp, expected, total=None, related=None, duplicates=None, uniq assert_json_response(resp, expected_shape) +def matched_files(matches): + """Get files of the given matches.""" + files = set() + for match in matches: + files.add(match.query_video_file) + files.add(match.match_video_file) + return files + + +def refresh(session, *entities): + """Refresh entities from the current session.""" + if not entities: + return [] + return [session.query(entity.__class__).get(entity.id) for entity in entities] + + +def assert_same(actual, expected): + """Assert actual payload items refers to expected entities.""" + actual_ids = {item["id"] for item in actual} + expected_ids = {entity.id for entity in expected} + assert actual_ids == expected_ids + assert len(actual) == len(expected) + + def make_file(prefix="", length=42, ext="flv", audio=True, date=datetime.date(2000, 1, 1), scenes=((0, 1), (1, 2))): """Create unique file.""" @@ -659,11 +683,7 @@ def test_list_file_matches_basic(client, app): ], }) payload = json_payload(resp) - expected_file_ids = {match.query_video_file_id for match in matches[offset:offset + limit]} - expected_file_ids |= {match.match_video_file_id for match in matches[offset:offset + limit]} - actual_file_ids = {file["id"] for file in payload["files"]} - assert actual_file_ids == expected_file_ids - assert len(payload["files"]) + assert_same(payload["files"], matched_files(matches[offset:offset + limit])) def test_list_file_matches_include(client, app): @@ -701,3 +721,48 @@ def test_list_file_matches_include(client, app): assert all( "scenes" not in file.keys() for file in json_payload(resp)["files"] ) + + +def test_list_file_matches_hops(client, app): + hops = 100 + with session_scope(app) as session: + source = make_file() + linked = make_files(2) + prev1, prev2 = linked + matches = [link(source, prev1), link(source, prev2)] + + for _ in range(hops - 1): + cur1, cur2 = make_files(2) + matches.extend([ + link(prev1, cur1), link(prev1, cur2), + link(cur2, prev2), link(cur1, prev2)]) + linked.append(cur1) + linked.append(cur2) + prev1, prev2 = cur1, cur2 + session.add_all(matches) + + matches.sort(key=attr("id")) + + # Query all + resp = client.get(f"/api/v1/files/{source.id}/matches?hops={hops}&limit={len(matches)}") + payload = json_payload(resp) + assert_same(payload["matches"], matches) + assert_same(payload["files"], [source] + linked) + + # Query half + half = int(hops / 2) + resp = client.get(f"/api/v1/files/{source.id}/matches?hops={half}&limit={len(matches)}") + assert_same(json_payload(resp)["files"], [source] + linked[:2 * half]) + + # Create a short cut from the source to the most distant items + with session_scope(app) as session: + source, cur1, cur2 = refresh(session, source, cur1, cur2) + short_cut = [link(source, cur1), link(source, cur2)] + session.add_all(short_cut) + matches.extend(short_cut) + + # Query half hops must return all files now + resp = client.get(f"/api/v1/files/{source.id}/matches?hops={half}&limit={len(matches) + 2}") + payload = json_payload(resp) + assert_same(payload["matches"], matches) + assert_same(payload["files"], [source] + linked) From 9a337104096959dad9e9b0ae0a48fd108982063e Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Thu, 22 Oct 2020 00:01:28 +0700 Subject: [PATCH 17/34] Update file matches page --- .../FileMatchesPage/FileMatchesPage.js | 32 +++++++++-- .../MatchPreview/MatchPreview.js | 16 +++--- web/src/collection/state/actions.js | 33 +++++++++++- web/src/collection/state/reducers.js | 53 +++++++++++++++---- web/src/collection/state/sagas.js | 4 +- web/src/server-api/Server/Transform.js | 8 ++- 6 files changed, 117 insertions(+), 29 deletions(-) diff --git a/web/src/collection/components/FileMatchesPage/FileMatchesPage.js b/web/src/collection/components/FileMatchesPage/FileMatchesPage.js index 7e459a40..67157242 100644 --- a/web/src/collection/components/FileMatchesPage/FileMatchesPage.js +++ b/web/src/collection/components/FileMatchesPage/FileMatchesPage.js @@ -56,18 +56,37 @@ function useMessages(matchesCount) { }; } +function isIncident(id) { + return (match) => match.source === id || match.target === id; +} + +function getMatchedFile(match, files, id) { + if (match.source === id) { + return files[match.target]; + } else if (match.target === id) { + return files[match.source]; + } else { + throw Error( + `Match ${JSON.stringify(match)} is not incident to file id ${id}` + ); + } +} + function FileMatchesPage(props) { const { className } = props; const classes = useStyles(); - const { id } = useParams(); + const { id: rawId } = useParams(); + const id = Number(rawId); const { file, error, loadFile } = useFile(id); const messages = useMessages((file && file.matchesCount) || 0); const [view, setView] = useState(View.grid); - const matches = useSelector(selectFileMatches).matches; + const matchesState = useSelector(selectFileMatches); + const matches = matchesState.matches.filter(isIncident(id)); + const files = matchesState.files; const dispatch = useDispatch(); useEffect(() => { - dispatch(updateFileMatchFilters(id, {})); + dispatch(updateFileMatchFilters(id, { hops: 1 })); }, [id]); if (file == null) { @@ -118,8 +137,11 @@ function FileMatchesPage(props) { > {matches.map((match) => ( - - + + ))} diff --git a/web/src/collection/components/FileMatchesPage/MatchPreview/MatchPreview.js b/web/src/collection/components/FileMatchesPage/MatchPreview/MatchPreview.js index 8d6fc16e..b5684292 100644 --- a/web/src/collection/components/FileMatchesPage/MatchPreview/MatchPreview.js +++ b/web/src/collection/components/FileMatchesPage/MatchPreview/MatchPreview.js @@ -14,6 +14,7 @@ import { useIntl } from "react-intl"; import ButtonBase from "@material-ui/core/ButtonBase"; import { useHistory } from "react-router-dom"; import { routes } from "../../../../routing/routes"; +import FileType from "../../FileBrowserPage/FileType"; const useStyles = makeStyles((theme) => ({ root: { @@ -102,8 +103,7 @@ function useMessages(file) { } function MatchPreview(props) { - const { match, highlight, className } = props; - const file = match.file; + const { file, distance, highlight, className } = props; const intl = useIntl(); const classes = useStyles(); const messages = useMessages(file); @@ -111,7 +111,7 @@ function MatchPreview(props) { const handleMoreInfo = useCallback( () => history.push(routes.collection.fileURL(file.id)), - [match] + [file.id] ); return ( @@ -139,7 +139,7 @@ function MatchPreview(props) {
- +
file LRU cache */ fileCache: { - maxSize: 100, + maxSize: 1000, files: {}, history: [], }, @@ -54,21 +57,28 @@ export const initialState = { limit: 100, offset: 0, matches: [], + files: {}, }, }; -function filenames(files) { +function ids(entities) { const result = new Set(); - for (let file of files) { - result.add(file.filename); + for (let entity of entities) { + result.add(entity.id); } return result; } -function extendFiles(existing, loaded) { - const existingNames = filenames(existing); - const newFiles = loaded.filter((item) => !existingNames.has(item.filename)); - return [...existing, ...newFiles]; +function extendEntityList(existing, loaded) { + const existingIds = ids(existing); + const newEntities = loaded.filter((item) => !existingIds.has(item.id)); + return [...existing, ...newEntities]; +} + +function extendEntityMap(existing, loaded) { + const result = { ...existing }; + loaded.forEach((entity) => (result[entity.id] = entity)); + return result; } function fileCacheReducer(state = initialState.fileCache, action) { @@ -97,6 +107,7 @@ function fileMatchesReducer(state = initialState.fileMatches, action) { ...state, filters: { ...state.filters, ...action.filters, fileId: action.fileId }, matches: [], + files: {}, loading: true, }; case ACTION_UPDATE_FILE_MATCH_FILTERS_SUCCESS: @@ -104,16 +115,37 @@ function fileMatchesReducer(state = initialState.fileMatches, action) { ...state, total: action.total, matches: [...action.matches], + files: extendEntityMap({}, action.files), error: false, loading: false, }; case ACTION_UPDATE_FILE_MATCH_FILTERS_FAILURE: return { matches: [], + files: {}, total: 0, error: true, loading: false, }; + case ACTION_FETCH_FILE_MATCHES: + return { + ...state, + loading: true, + }; + case ACTION_FETCH_FILE_MATCHES_SUCCESS: + return { + ...state, + total: action.total, + matches: extendEntityList(state.matches, action.matches), + files: extendEntityMap(state.files, action.files), + error: false, + loading: false, + }; + case ACTION_FETCH_FILE_MATCHES_FAILURE: + return { + error: true, + loading: false, + }; default: return state; } @@ -152,7 +184,7 @@ export function collRootReducer(state = initialState, action) { return { ...state, error: false, - files: extendFiles(state.files, action.files), + files: extendEntityList(state.files, action.files), counts: { ...action.counts }, loading: false, }; @@ -170,6 +202,9 @@ export function collRootReducer(state = initialState, action) { case ACTION_UPDATE_FILE_MATCH_FILTERS: case ACTION_UPDATE_FILE_MATCH_FILTERS_SUCCESS: case ACTION_UPDATE_FILE_MATCH_FILTERS_FAILURE: + case ACTION_FETCH_FILE_MATCHES: + case ACTION_FETCH_FILE_MATCHES_SUCCESS: + case ACTION_FETCH_FILE_MATCHES_FAILURE: return { ...state, fileMatches: fileMatchesReducer(state.fileMatches, action), diff --git a/web/src/collection/state/sagas.js b/web/src/collection/state/sagas.js index b7464633..3aed104b 100644 --- a/web/src/collection/state/sagas.js +++ b/web/src/collection/state/sagas.js @@ -32,8 +32,8 @@ function* fetchFileMatchesSaga(server, action) { } // Update state - const { total, matches } = resp.data; - yield put(updateFileMatchFiltersSuccess(matches, total)); + const { total, matches, files } = resp.data; + yield put(updateFileMatchFiltersSuccess(matches, files, total)); } catch (error) { console.error(error); yield put(updateFileMatchFiltersFailure(error)); diff --git a/web/src/server-api/Server/Transform.js b/web/src/server-api/Server/Transform.js index 9cc7d95e..6613133c 100644 --- a/web/src/server-api/Server/Transform.js +++ b/web/src/server-api/Server/Transform.js @@ -106,14 +106,12 @@ export default class Transform { fetchFileMatchesResults(data) { return { total: data.total, - matches: data.items.map((match) => this.fileMatch(match)), + matches: data.matches.map((match) => this.fileMatch(match)), + files: data.files.map((file) => this.videoFile(file)), }; } fileMatch(match) { - return { - distance: match.distance, - file: this.videoFile(match.file), - }; + return { ...match }; // No difference at the moment } } From 682f353a9ee0a63de69370a154280dbfe8fbc69e Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Thu, 22 Oct 2020 00:13:02 +0700 Subject: [PATCH 18/34] Update cluster page to consume new matches format --- .../FileClusterPage/FileClusterPage.js | 13 +++++++--- .../components/FileMatchesPage/MatchType.js | 4 ++- .../components/MatchGraph/MatchGraph.js | 26 ++++++++++++++----- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/web/src/collection/components/FileClusterPage/FileClusterPage.js b/web/src/collection/components/FileClusterPage/FileClusterPage.js index 614bc8cf..79fd3383 100644 --- a/web/src/collection/components/FileClusterPage/FileClusterPage.js +++ b/web/src/collection/components/FileClusterPage/FileClusterPage.js @@ -34,11 +34,13 @@ function FileClusterPage(props) { const classes = useStyles(); const { id } = useParams(); const { file, error, loadFile } = useFile(id); - const matches = useSelector(selectFileMatches).matches; + const matchesState = useSelector(selectFileMatches); + const matches = matchesState.matches; + const files = matchesState.files; const dispatch = useDispatch(); useEffect(() => { - dispatch(updateFileMatchFilters(id, {})); + dispatch(updateFileMatchFilters(id, { hops: 2 })); }, [id]); if (file == null) { @@ -58,7 +60,12 @@ function FileClusterPage(props) {
- +
); } diff --git a/web/src/collection/components/FileMatchesPage/MatchType.js b/web/src/collection/components/FileMatchesPage/MatchType.js index 191fda83..71e03958 100644 --- a/web/src/collection/components/FileMatchesPage/MatchType.js +++ b/web/src/collection/components/FileMatchesPage/MatchType.js @@ -5,7 +5,9 @@ import FileType from "../FileBrowserPage/FileType"; * Prop-type for a match between two files. */ export const MatchType = PropTypes.shape({ - file: FileType.isRequired, + id: PropTypes.number.isRequired, + source: PropTypes.number.isRequired, + target: PropTypes.number.isRequired, distance: PropTypes.number.isRequired, }); diff --git a/web/src/collection/components/MatchGraph/MatchGraph.js b/web/src/collection/components/MatchGraph/MatchGraph.js index 51ef0c4b..be8424a1 100644 --- a/web/src/collection/components/MatchGraph/MatchGraph.js +++ b/web/src/collection/components/MatchGraph/MatchGraph.js @@ -22,8 +22,8 @@ const useStyles = makeStyles(() => ({ */ function getLinks(source, matches) { return matches.map((match) => ({ - source: source.id, - target: match.file.id, + source: match.source, + target: match.target, value: 10 * (1 - match.distance), })); } @@ -31,15 +31,17 @@ function getLinks(source, matches) { /** * Get collection of nodes compatible with D3Graph */ -function getNodes(source, matches) { +function getNodes(source, files) { return [ - { id: source.id, group: 2 }, - ...matches.map((match) => ({ id: match.file.id, group: 1 })), + ...Object.values(files).map((file) => ({ + id: file.id, + group: file.id === source.id ? 2 : 1, + })), ]; } function MatchGraph(props) { - const { source, matches, className } = props; + const { source, matches, files, className } = props; const classes = useStyles(); const ref = useRef(null); @@ -47,7 +49,7 @@ function MatchGraph(props) { if (ref.current != null) { const graph = new D3Graph({ links: getLinks(source, matches), - nodes: getNodes(source, matches), + nodes: getNodes(source, files), container: ref.current, classes: { content: classes.content }, }); @@ -63,8 +65,18 @@ function MatchGraph(props) { } MatchGraph.propTypes = { + /** + * A initial file for which all similar files were selected + */ source: FileType.isRequired, + /** + * Similarity relationship between files + */ matches: PropTypes.arrayOf(MatchType).isRequired, + /** + * Similar files map + */ + files: PropTypes.object.isRequired, className: PropTypes.string, }; From 1eda4099890af6fb25671a5d149ede291354b165 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Thu, 22 Oct 2020 16:16:25 +0700 Subject: [PATCH 19/34] Improve graph container responsiveness --- .../collection/components/MatchGraph/D3Graph.js | 15 ++++----------- .../components/MatchGraph/MatchGraph.js | 9 ++++----- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/web/src/collection/components/MatchGraph/D3Graph.js b/web/src/collection/components/MatchGraph/D3Graph.js index 547e902b..c03d920c 100644 --- a/web/src/collection/components/MatchGraph/D3Graph.js +++ b/web/src/collection/components/MatchGraph/D3Graph.js @@ -10,18 +10,11 @@ function removeChildren(element) { } export default class D3Graph { - constructor({ - links, - nodes, - container, - width = 500, - height = 500, - classes = {}, - }) { + constructor({ links, nodes, container, classes = {} }) { this.links = links.map(Object.create); this.nodes = nodes.map(Object.create); - this.width = width; - this.height = height; + this.width = container?.clientWidth; + this.height = container?.clientHeight; this.container = container; this.classes = classes; } @@ -59,7 +52,7 @@ export default class D3Graph { .selectAll("circle") .data(this.nodes) .join("circle") - .attr("r", 5) + .attr("r", 15) .attr("fill", color) .call(this._createDrag(simulation)); diff --git a/web/src/collection/components/MatchGraph/MatchGraph.js b/web/src/collection/components/MatchGraph/MatchGraph.js index be8424a1..40e056d0 100644 --- a/web/src/collection/components/MatchGraph/MatchGraph.js +++ b/web/src/collection/components/MatchGraph/MatchGraph.js @@ -6,14 +6,13 @@ import D3Graph from "./D3Graph"; import MatchType from "../FileMatchesPage/MatchType"; import FileType from "../FileBrowserPage/FileType"; -const useStyles = makeStyles(() => ({ +const useStyles = makeStyles((theme) => ({ root: { - display: "flex", - alignItems: "center", - justifyContent: "center", + margin: theme.spacing(2), }, content: { - height: 500, + width: "100%", + minHeight: 500, }, })); From c7fc8a9545b6753e239f2d638be43c5861012f40 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Thu, 22 Oct 2020 16:22:41 +0700 Subject: [PATCH 20/34] Fix linting issues --- .../components/FileMatchesPage/MatchPreview/MatchPreview.js | 1 - web/src/collection/components/FileMatchesPage/MatchType.js | 1 - 2 files changed, 2 deletions(-) diff --git a/web/src/collection/components/FileMatchesPage/MatchPreview/MatchPreview.js b/web/src/collection/components/FileMatchesPage/MatchPreview/MatchPreview.js index b5684292..84ed450e 100644 --- a/web/src/collection/components/FileMatchesPage/MatchPreview/MatchPreview.js +++ b/web/src/collection/components/FileMatchesPage/MatchPreview/MatchPreview.js @@ -3,7 +3,6 @@ import clsx from "clsx"; import PropTypes from "prop-types"; import { makeStyles } from "@material-ui/styles"; import Paper from "@material-ui/core/Paper"; -import MatchType from "../MatchType"; import VideocamOutlinedIcon from "@material-ui/icons/VideocamOutlined"; import Marked from "../../../../common/components/Marked"; import IconButton from "@material-ui/core/IconButton"; diff --git a/web/src/collection/components/FileMatchesPage/MatchType.js b/web/src/collection/components/FileMatchesPage/MatchType.js index 71e03958..a62f5c5f 100644 --- a/web/src/collection/components/FileMatchesPage/MatchType.js +++ b/web/src/collection/components/FileMatchesPage/MatchType.js @@ -1,5 +1,4 @@ import PropTypes from "prop-types"; -import FileType from "../FileBrowserPage/FileType"; /** * Prop-type for a match between two files. From d32e51c6ae8915993f7d1d87ae5fd6c22a91b5c9 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Thu, 22 Oct 2020 19:12:21 +0700 Subject: [PATCH 21/34] Improve graph responsiveness --- .../components/MatchGraph/D3Graph.js | 41 +++++++++++++++++-- .../components/MatchGraph/MatchGraph.js | 11 +++-- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/web/src/collection/components/MatchGraph/D3Graph.js b/web/src/collection/components/MatchGraph/D3Graph.js index c03d920c..8672e055 100644 --- a/web/src/collection/components/MatchGraph/D3Graph.js +++ b/web/src/collection/components/MatchGraph/D3Graph.js @@ -17,6 +17,8 @@ export default class D3Graph { this.height = container?.clientHeight; this.container = container; this.classes = classes; + this.updateSize = null; + this.simulation = null; } /** @@ -26,13 +28,13 @@ export default class D3Graph { const scale = d3.scaleOrdinal(d3.schemeCategory10); const color = (d) => scale(d.group); - const simulation = this._createForceSimulation(); + this.simulation = this._createForceSimulation(); removeChildren(this.container); let svg = d3 .select(this.container) - .attr("preserveAspectRatio", "xMaxYMid meet") + .attr("preserveAspectRatio", "xMidYMid meet") .attr("viewBox", [0, 0, this.width, this.height]) .classed(this.classes.content, true); @@ -54,11 +56,11 @@ export default class D3Graph { .join("circle") .attr("r", 15) .attr("fill", color) - .call(this._createDrag(simulation)); + .call(this._createDrag(this.simulation)); node.append("title").text((d) => d.id); - simulation.on("tick", () => { + this.simulation.on("tick", () => { link .attr("x1", (d) => d.source.x) .attr("y1", (d) => d.source.y) @@ -67,6 +69,22 @@ export default class D3Graph { node.attr("cx", (d) => d.x).attr("cy", (d) => d.y); }); + + this.updateSize = () => { + this.width = this.container?.clientWidth; + this.height = this.container?.clientHeight; + svg + .attr("width", this.width) + .attr("height", this.height) + .attr("viewBox", [0, 0, this.width, this.height]) + .classed(this.classes.content, true); + this.simulation.force( + "center", + d3.forceCenter(this.width / 2, this.height / 2) + ); + this.simulation.restart(); + }; + window.addEventListener("resize", this.updateSize); } /** @@ -118,4 +136,19 @@ export default class D3Graph { .force("charge", d3.forceManyBody()) .force("center", d3.forceCenter(this.width / 2, this.height / 2)); } + + /** + * Remove graph elements, remove all listeners, clear container. + */ + cleanup() { + removeChildren(this.container); + if (this.updateSize != null) { + window.removeEventListener("resize", this.updateSize); + this.updateSize = null; + } + if (this.simulation != null) { + this.simulation.stop(); + this.simulation = null; + } + } } diff --git a/web/src/collection/components/MatchGraph/MatchGraph.js b/web/src/collection/components/MatchGraph/MatchGraph.js index 40e056d0..62e374ad 100644 --- a/web/src/collection/components/MatchGraph/MatchGraph.js +++ b/web/src/collection/components/MatchGraph/MatchGraph.js @@ -1,4 +1,4 @@ -import React, { useEffect, useRef } from "react"; +import React, { useEffect, useRef, useState } from "react"; import clsx from "clsx"; import PropTypes from "prop-types"; import { makeStyles } from "@material-ui/styles"; @@ -43,16 +43,21 @@ function MatchGraph(props) { const { source, matches, files, className } = props; const classes = useStyles(); const ref = useRef(null); + const [graph, setGraph] = useState(null); useEffect(() => { if (ref.current != null) { - const graph = new D3Graph({ + if (graph != null) { + graph.cleanup(); + } + const newGraph = new D3Graph({ links: getLinks(source, matches), nodes: getNodes(source, files), container: ref.current, classes: { content: classes.content }, }); - graph.display(); + newGraph.display(); + setGraph(newGraph); } }, [ref.current, source, matches]); From 9a4dd5e17fc03ccfc9597765ac047d61417ce435 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 23 Oct 2020 20:28:04 +0700 Subject: [PATCH 22/34] Implement generic loading trigger --- .../components/LoadingTrigger/LoadTrigger.js | 121 ++++++++++++++++++ .../common/components/LoadingTrigger/index.js | 1 + 2 files changed, 122 insertions(+) create mode 100644 web/src/common/components/LoadingTrigger/LoadTrigger.js create mode 100644 web/src/common/components/LoadingTrigger/index.js diff --git a/web/src/common/components/LoadingTrigger/LoadTrigger.js b/web/src/common/components/LoadingTrigger/LoadTrigger.js new file mode 100644 index 00000000..c361e180 --- /dev/null +++ b/web/src/common/components/LoadingTrigger/LoadTrigger.js @@ -0,0 +1,121 @@ +import React, { useCallback } from "react"; +import clsx from "clsx"; +import PropTypes from "prop-types"; +import { makeStyles } from "@material-ui/styles"; +import { useIntl } from "react-intl"; +import VisibilitySensor from "react-visibility-sensor"; +import CircularProgress from "@material-ui/core/CircularProgress"; + +const useStyles = makeStyles((theme) => ({ + trigger: { + display: "flex", + alignItems: "center", + justifyContent: "center", + }, + triggerArea: { + minWidth: 1, + minHeight: 1, + }, + errorMessage: { + display: "flex", + alignItems: "center", + justifyContent: "center", + flexDirection: "column", + ...theme.mixins.title4, + }, + retryLink: { + color: theme.palette.primary.main, + cursor: "pointer", + paddingTop: theme.spacing(1), + }, +})); + +/** + * Get i18n text. + */ +function useMessages() { + const intl = useIntl(); + return { + retry: intl.formatMessage({ id: "actions.retry" }), + }; +} + +/** + * Match loading trigger + */ +function LoadTrigger(props) { + const { + error, + container: Container, + hasMore, + loading, + onLoad, + errorMessage, + className, + ...other + } = props; + const classes = useStyles(); + const messages = useMessages(); + + const handleVisibilityChange = useCallback( + (visible) => { + if (visible && !loading && hasMore) { + onLoad(); + } + }, + [onLoad, loading, hasMore] + ); + + if (!hasMore) { + return null; + } + + return ( + + {!loading && !error && ( + +
+ + )} + {loading && } + {!loading && error && ( +
+ {messages.error} +
+ {messages.retry} +
+
+ )} + + ); +} + +LoadTrigger.propTypes = { + /** + * Indicate loading error + */ + error: PropTypes.bool, + /** + * File loading is in progress + */ + loading: PropTypes.bool.isRequired, + /** + * Trigger loading of the next portion of files + */ + onLoad: PropTypes.func.isRequired, + /** + * Whether more files could be loaded + */ + hasMore: PropTypes.bool.isRequired, + /** + * Container component + */ + container: PropTypes.elementType.isRequired, + /** + * Message displayed when error=true + */ + errorMessage: PropTypes.string.isRequired, + className: PropTypes.string, +}; + +export default LoadTrigger; diff --git a/web/src/common/components/LoadingTrigger/index.js b/web/src/common/components/LoadingTrigger/index.js new file mode 100644 index 00000000..d51b96f7 --- /dev/null +++ b/web/src/common/components/LoadingTrigger/index.js @@ -0,0 +1 @@ +export { default } from "./LoadingTrigger"; From 7f1ec4b35d0fc37c27d44caa8a16de8a9782c801 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 23 Oct 2020 20:42:09 +0700 Subject: [PATCH 23/34] Support matches filtering --- web/src/server-api/Server/Server.js | 6 ++++-- web/src/server-api/Server/helpers.js | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/web/src/server-api/Server/Server.js b/web/src/server-api/Server/Server.js index 87d31da7..7130132c 100644 --- a/web/src/server-api/Server/Server.js +++ b/web/src/server-api/Server/Server.js @@ -2,7 +2,7 @@ import axios from "axios"; import * as HttpStatus from "http-status-codes"; import Transform from "./Transform"; import { Response } from "../Response"; -import { filtersToQueryParams } from "./helpers"; +import { fileFiltersToQueryParams, matchFiltersToQueryParams } from "./helpers"; export default class Server { constructor({ baseURL = "/api/v1", timeout = 10 * 1000, headers = {} } = {}) { @@ -21,7 +21,7 @@ export default class Server { offset, limit, include: ["signature", "meta", "exif"].join(","), - ...filtersToQueryParams(filters), + ...fileFiltersToQueryParams(filters), }, }); const data = this.transform.fetchFileResults(response.data); @@ -50,6 +50,7 @@ export default class Server { limit = 20, offset = 0, fields = ["meta", "exif"], + filters, }) { try { const response = await this.axios.get(`/files/${id}/matches`, { @@ -57,6 +58,7 @@ export default class Server { limit, offset, include: fields.join(","), + ...matchFiltersToQueryParams(filters), }, }); const data = this.transform.fetchFileMatchesResults(response.data); diff --git a/web/src/server-api/Server/helpers.js b/web/src/server-api/Server/helpers.js index ae30966e..a0a50f04 100644 --- a/web/src/server-api/Server/helpers.js +++ b/web/src/server-api/Server/helpers.js @@ -1,6 +1,6 @@ import { format as formatDate } from "date-fns"; -export function filtersToQueryParams(filters) { +export function fileFiltersToQueryParams(filters) { const params = {}; if (filters.query) { params.path = filters.query; @@ -39,3 +39,17 @@ export function filtersToQueryParams(filters) { } return params; } + +export function matchFiltersToQueryParams(filters) { + const params = {}; + if (filters?.hops != null) { + params.hops = filters.hops; + } + if (filters?.minDistance != null) { + params.min_distance = filters.minDistance; + } + if (filters?.maxDistance != null) { + params.max_distance = filters.maxDistance; + } + return params; +} From 4a3c5c0526996cb82bd6c160b15487e9e8b09451 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 23 Oct 2020 20:43:22 +0700 Subject: [PATCH 24/34] Implement dynamic matches loading --- .../FileMatchesPage/FileMatchesPage.js | 27 +++++++++++- .../FileMatchesPage/MatchPreview/Container.js | 38 +++++++++++++++++ .../MatchPreview/MatchPreview.js | 16 ++++---- web/src/collection/state/reducers.js | 12 ++++-- web/src/collection/state/sagas.js | 41 +++++++++++++++---- web/src/i18n/locales/default.en-US.json | 3 +- 6 files changed, 115 insertions(+), 22 deletions(-) create mode 100644 web/src/collection/components/FileMatchesPage/MatchPreview/Container.js diff --git a/web/src/collection/components/FileMatchesPage/FileMatchesPage.js b/web/src/collection/components/FileMatchesPage/FileMatchesPage.js index 67157242..e25034df 100644 --- a/web/src/collection/components/FileMatchesPage/FileMatchesPage.js +++ b/web/src/collection/components/FileMatchesPage/FileMatchesPage.js @@ -1,4 +1,4 @@ -import React, { useEffect, useState } from "react"; +import React, { useCallback, useEffect, useState } from "react"; import clsx from "clsx"; import PropTypes from "prop-types"; import { makeStyles } from "@material-ui/styles"; @@ -18,7 +18,8 @@ import useFile from "../../hooks/useFile"; import FileLoadingHeader from "../FileLoadingHeader"; import { useDispatch, useSelector } from "react-redux"; import { selectFileMatches } from "../../state/selectors"; -import { updateFileMatchFilters } from "../../state/actions"; +import { fetchFileMatches, updateFileMatchFilters } from "../../state/actions"; +import LoadTrigger from "../../../common/components/LoadingTrigger/LoadTrigger"; const useStyles = makeStyles((theme) => ({ root: { @@ -41,6 +42,9 @@ const useStyles = makeStyles((theme) => ({ actionButton: { margin: theme.spacing(1.5), }, + trigger: { + minHeight: 250, + }, })); /** @@ -53,6 +57,7 @@ function useMessages(matchesCount) { matched: intl.formatMessage({ id: "file.matched" }, { count: matches }), showFilters: intl.formatMessage({ id: "actions.showFiltersPane" }), searchMatches: intl.formatMessage({ id: "actions.searchMatches" }), + loadError: intl.formatMessage({ id: "match.load.error" }), }; } @@ -89,6 +94,10 @@ function FileMatchesPage(props) { dispatch(updateFileMatchFilters(id, { hops: 1 })); }, [id]); + const handleLoad = useCallback(() => { + dispatch(fetchFileMatches()); + }, [matchesState]); + if (file == null) { return (
@@ -144,6 +153,20 @@ function FileMatchesPage(props) { /> ))} + + +
diff --git a/web/src/collection/components/FileMatchesPage/MatchPreview/Container.js b/web/src/collection/components/FileMatchesPage/MatchPreview/Container.js new file mode 100644 index 00000000..59185932 --- /dev/null +++ b/web/src/collection/components/FileMatchesPage/MatchPreview/Container.js @@ -0,0 +1,38 @@ +import React from "react"; +import clsx from "clsx"; +import PropTypes from "prop-types"; +import { makeStyles } from "@material-ui/styles"; +import Paper from "@material-ui/core/Paper"; + +const useStyles = makeStyles({ + root: { + boxShadow: "0 12px 18px 0 rgba(0,0,0,0.08)", + "&:focus": { + outline: "none", + boxShadow: "0 12px 18px 0 rgba(0,0,0,0.28)", + }, + }, +}); + +function Container(props) { + const { children, className, ...other } = props; + const classes = useStyles(); + return ( + + {children} + + ); +} + +Container.propTypes = { + /** + * Preview content. + */ + children: PropTypes.oneOfType([ + PropTypes.arrayOf(PropTypes.node), + PropTypes.node, + ]), + className: PropTypes.string, +}; + +export default Container; diff --git a/web/src/collection/components/FileMatchesPage/MatchPreview/MatchPreview.js b/web/src/collection/components/FileMatchesPage/MatchPreview/MatchPreview.js index 84ed450e..b89c3680 100644 --- a/web/src/collection/components/FileMatchesPage/MatchPreview/MatchPreview.js +++ b/web/src/collection/components/FileMatchesPage/MatchPreview/MatchPreview.js @@ -2,7 +2,6 @@ import React, { useCallback } from "react"; import clsx from "clsx"; import PropTypes from "prop-types"; import { makeStyles } from "@material-ui/styles"; -import Paper from "@material-ui/core/Paper"; import VideocamOutlinedIcon from "@material-ui/icons/VideocamOutlined"; import Marked from "../../../../common/components/Marked"; import IconButton from "@material-ui/core/IconButton"; @@ -14,17 +13,13 @@ import ButtonBase from "@material-ui/core/ButtonBase"; import { useHistory } from "react-router-dom"; import { routes } from "../../../../routing/routes"; import FileType from "../../FileBrowserPage/FileType"; +import Container from "./Container"; const useStyles = makeStyles((theme) => ({ root: { - boxShadow: "0 12px 18px 0 rgba(0,0,0,0.08)", display: "flex", flexDirection: "column", alignItems: "stretch", - "&:focus": { - outline: "none", - boxShadow: "0 12px 18px 0 rgba(0,0,0,0.28)", - }, }, nameContainer: { display: "flex", @@ -114,7 +109,7 @@ function MatchPreview(props) { ); return ( -
- + ); } @@ -171,4 +166,9 @@ MatchPreview.propTypes = { className: PropTypes.string, }; +/** + * Preview container component + */ +MatchPreview.Container = Container; + export default MatchPreview; diff --git a/web/src/collection/state/reducers.js b/web/src/collection/state/reducers.js index a2d80073..fdc5e50a 100644 --- a/web/src/collection/state/reducers.js +++ b/web/src/collection/state/reducers.js @@ -50,12 +50,15 @@ export const initialState = { */ fileMatches: { fileId: undefined, - filters: {}, - total: 0, + filters: { + hops: 1, + minDistance: 0.0, + maxDistance: 1.0, + }, + total: undefined, error: false, loading: false, limit: 100, - offset: 0, matches: [], files: {}, }, @@ -105,10 +108,12 @@ function fileMatchesReducer(state = initialState.fileMatches, action) { case ACTION_UPDATE_FILE_MATCH_FILTERS: return { ...state, + fileId: action.fileId, filters: { ...state.filters, ...action.filters, fileId: action.fileId }, matches: [], files: {}, loading: true, + total: undefined, }; case ACTION_UPDATE_FILE_MATCH_FILTERS_SUCCESS: return { @@ -121,6 +126,7 @@ function fileMatchesReducer(state = initialState.fileMatches, action) { }; case ACTION_UPDATE_FILE_MATCH_FILTERS_FAILURE: return { + ...state, matches: [], files: {}, total: 0, diff --git a/web/src/collection/state/sagas.js b/web/src/collection/state/sagas.js index 3aed104b..8e91cb05 100644 --- a/web/src/collection/state/sagas.js +++ b/web/src/collection/state/sagas.js @@ -1,8 +1,11 @@ import { call, put, select, takeLatest } from "redux-saga/effects"; import { + ACTION_FETCH_FILE_MATCHES, ACTION_FETCH_FILES, ACTION_UPDATE_FILE_MATCH_FILTERS, ACTION_UPDATE_FILTERS, + fetchFileMatchesFailure, + fetchFileMatchesSuccess, fetchFilesFailure, fetchFilesSuccess, updateFileMatchFiltersFailure, @@ -12,31 +15,52 @@ import { } from "./actions"; import { selectColl, selectFileMatches } from "./selectors"; -function* fetchFileMatchesSaga(server, action) { +function* updateFileMatchFiltersSaga(server, action) { + yield* fetchFileMatchesSaga( + server, + action, + updateFileMatchFiltersSuccess, + updateFileMatchFiltersFailure + ); +} + +function* fetchFileMatchesPageSaga(server, action) { + yield* fetchFileMatchesSaga( + server, + action, + fetchFileMatchesSuccess, + fetchFileMatchesFailure + ); +} + +function* fetchFileMatchesSaga(server, action, success, failure) { try { // Determine current query params - const { limit, offset } = yield select(selectFileMatches); + const { limit, filters, fileId, matches: current } = yield select( + selectFileMatches + ); // Send request to the server const resp = yield call([server, server.fetchFileMatches], { limit, - offset, - id: action.fileId, + offset: current.length, + id: fileId, + filters, }); // Handle error if (resp.failure) { console.error("Fetch file matches error", resp.error); - yield put(updateFileMatchFiltersFailure(resp.error)); + yield put(failure(resp.error)); return; } // Update state const { total, matches, files } = resp.data; - yield put(updateFileMatchFiltersSuccess(matches, files, total)); + yield put(success(matches, files, total)); } catch (error) { console.error(error); - yield put(updateFileMatchFiltersFailure(error)); + yield put(failure(error)); } } @@ -98,7 +122,8 @@ export default function* collRootSaga(server) { ); yield takeLatest( ACTION_UPDATE_FILE_MATCH_FILTERS, - fetchFileMatchesSaga, + updateFileMatchFiltersSaga, server ); + yield takeLatest(ACTION_FETCH_FILE_MATCHES, fetchFileMatchesPageSaga, server); } diff --git a/web/src/i18n/locales/default.en-US.json b/web/src/i18n/locales/default.en-US.json index 96df34bc..fd0f59b8 100644 --- a/web/src/i18n/locales/default.en-US.json +++ b/web/src/i18n/locales/default.en-US.json @@ -170,6 +170,7 @@ "filter.defaultMinDate": "minimal date", "filter.creationDate": "Creation date (mm/dd/yyyy)", "filter.creationDate.help": "Based on file creation date.", - "preview.notAvailable": "Preview not available." + "preview.notAvailable": "Preview not available.", + "match.load.error": "Error loading matches." } } From bc8d97d464a6fbff78eed7ff411edfa2839c156c Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 23 Oct 2020 21:08:49 +0700 Subject: [PATCH 25/34] Fix loading trigger message --- web/src/common/components/LoadingTrigger/LoadTrigger.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/src/common/components/LoadingTrigger/LoadTrigger.js b/web/src/common/components/LoadingTrigger/LoadTrigger.js index c361e180..6e6d35d8 100644 --- a/web/src/common/components/LoadingTrigger/LoadTrigger.js +++ b/web/src/common/components/LoadingTrigger/LoadTrigger.js @@ -79,8 +79,8 @@ function LoadTrigger(props) { )} {loading && } {!loading && error && ( -
- {messages.error} +
+ {errorMessage}
{messages.retry}
From 4e3a8f899a5ae5f6689e064449678c2893f63cbf Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 23 Oct 2020 21:09:26 +0700 Subject: [PATCH 26/34] Fix match reducers --- web/src/collection/state/reducers.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/src/collection/state/reducers.js b/web/src/collection/state/reducers.js index fdc5e50a..06dd76bc 100644 --- a/web/src/collection/state/reducers.js +++ b/web/src/collection/state/reducers.js @@ -129,7 +129,7 @@ function fileMatchesReducer(state = initialState.fileMatches, action) { ...state, matches: [], files: {}, - total: 0, + total: undefined, error: true, loading: false, }; @@ -149,6 +149,7 @@ function fileMatchesReducer(state = initialState.fileMatches, action) { }; case ACTION_FETCH_FILE_MATCHES_FAILURE: return { + ...state, error: true, loading: false, }; From caa736150f0e5823581369c018136815dc3cf9de Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 23 Oct 2020 21:09:48 +0700 Subject: [PATCH 27/34] Improve dynamic match loading --- .../components/FileMatchesPage/FileMatchesPage.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/web/src/collection/components/FileMatchesPage/FileMatchesPage.js b/web/src/collection/components/FileMatchesPage/FileMatchesPage.js index e25034df..31468e9f 100644 --- a/web/src/collection/components/FileMatchesPage/FileMatchesPage.js +++ b/web/src/collection/components/FileMatchesPage/FileMatchesPage.js @@ -90,13 +90,13 @@ function FileMatchesPage(props) { const files = matchesState.files; const dispatch = useDispatch(); - useEffect(() => { - dispatch(updateFileMatchFilters(id, { hops: 1 })); - }, [id]); - const handleLoad = useCallback(() => { - dispatch(fetchFileMatches()); - }, [matchesState]); + if (matchesState.total == null) { + dispatch(updateFileMatchFilters(id, { hops: 1 })); + } else { + dispatch(fetchFileMatches()); + } + }, [id, matchesState]); if (file == null) { return ( From 5ffbc5a599aa600ea387e94ad52d842f6d716abe Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 23 Oct 2020 22:15:01 +0700 Subject: [PATCH 28/34] Implement neighbor loading --- .../FileClusterPage/FileClusterPage.js | 77 ++++++++++++++-- .../components/FileClusterPage/Loading.js | 90 +++++++++++++++++++ web/src/collection/state/reducers.js | 2 + .../components/LoadingTrigger/LoadTrigger.js | 8 +- 4 files changed, 166 insertions(+), 11 deletions(-) create mode 100644 web/src/collection/components/FileClusterPage/Loading.js diff --git a/web/src/collection/components/FileClusterPage/FileClusterPage.js b/web/src/collection/components/FileClusterPage/FileClusterPage.js index 79fd3383..6a61ad36 100644 --- a/web/src/collection/components/FileClusterPage/FileClusterPage.js +++ b/web/src/collection/components/FileClusterPage/FileClusterPage.js @@ -1,4 +1,4 @@ -import React, { useEffect } from "react"; +import React, { useCallback, useEffect } from "react"; import clsx from "clsx"; import PropTypes from "prop-types"; import { makeStyles } from "@material-ui/styles"; @@ -9,8 +9,10 @@ import useFile from "../../hooks/useFile"; import FileLoadingHeader from "../FileLoadingHeader"; import { useDispatch, useSelector } from "react-redux"; import { selectFileMatches } from "../../state/selectors"; -import { updateFileMatchFilters } from "../../state/actions"; +import { fetchFileMatches, updateFileMatchFilters } from "../../state/actions"; import MatchGraph from "../MatchGraph"; +import { useIntl } from "react-intl"; +import Loading from "./Loading"; const useStyles = makeStyles((theme) => ({ root: { @@ -27,45 +29,106 @@ const useStyles = makeStyles((theme) => ({ graph: { margin: theme.spacing(4), }, + loading: { + minHeight: 500, + }, })); +/** + * Get i18n text + */ +function useMessages() { + const intl = useIntl(); + return { + loadError: intl.formatMessage({ id: "match.load.error" }), + }; +} + function FileClusterPage(props) { const { className } = props; const classes = useStyles(); const { id } = useParams(); + const messages = useMessages(); const { file, error, loadFile } = useFile(id); const matchesState = useSelector(selectFileMatches); const matches = matchesState.matches; const files = matchesState.files; const dispatch = useDispatch(); + const hasMore = !(matches.length >= matchesState.total); useEffect(() => { dispatch(updateFileMatchFilters(id, { hops: 2 })); }, [id]); + useEffect(() => { + if ( + matchesState.loading || + matchesState.error || + matches.length >= matchesState.total + ) { + return; + } + dispatch(fetchFileMatches()); + }, [matchesState]); + + const handleRetry = useCallback(() => { + if (matchesState.total == null) { + dispatch(updateFileMatchFilters(id, { hops: 2 })); + } else { + dispatch(fetchFileMatches()); + } + }, [matchesState]); + + const handleLoadFile = useCallback(() => { + loadFile(); + handleRetry(); + }, [handleRetry, loadFile]); + if (file == null) { return (
); } - return ( -
- - + let content; + if (hasMore) { + const progress = + matchesState.total == null + ? undefined + : matches.length / matchesState.total; + + content = ( + + ); + } else { + content = ( + ); + } + + return ( +
+ + + {content}
); } diff --git a/web/src/collection/components/FileClusterPage/Loading.js b/web/src/collection/components/FileClusterPage/Loading.js new file mode 100644 index 00000000..fad3b281 --- /dev/null +++ b/web/src/collection/components/FileClusterPage/Loading.js @@ -0,0 +1,90 @@ +import React from "react"; +import clsx from "clsx"; +import PropTypes from "prop-types"; +import { makeStyles } from "@material-ui/styles"; +import CircularProgress from "@material-ui/core/CircularProgress"; +import { useIntl } from "react-intl"; + +const useStyles = makeStyles((theme) => ({ + root: { + display: "flex", + justifyContent: "center", + alignItems: "center", + }, + errorMessage: { + display: "flex", + alignItems: "center", + justifyContent: "center", + flexDirection: "column", + ...theme.mixins.title4, + }, + retryLink: { + color: theme.palette.primary.main, + cursor: "pointer", + paddingTop: theme.spacing(1), + }, +})); + +/** + * Get i18n text. + */ +function useMessages() { + const intl = useIntl(); + return { + retry: intl.formatMessage({ id: "actions.retry" }), + }; +} + +/** + * Interactive loading indicator. + */ +function Loading(props) { + const { error, errorMessage, onRetry, progress, className } = props; + const classes = useStyles(); + const messages = useMessages(); + const variant = progress == null ? "indeterminate" : "determinate"; + + return ( +
+ {!error && ( + + )} + {error && ( +
+ {errorMessage} +
+ {messages.retry} +
+
+ )} +
+ ); +} + +Loading.propTypes = { + /** + * Indicate loading error + */ + error: PropTypes.bool, + /** + * The value of the progress indicator for the determinate and static variants. + * Value between 0 and 1. + */ + progress: PropTypes.number, + /** + * Trigger loading of the next portion of files + */ + onRetry: PropTypes.func.isRequired, + /** + * Message displayed when error=true + */ + errorMessage: PropTypes.string.isRequired, + className: PropTypes.string, +}; + +export default Loading; diff --git a/web/src/collection/state/reducers.js b/web/src/collection/state/reducers.js index 06dd76bc..3a304364 100644 --- a/web/src/collection/state/reducers.js +++ b/web/src/collection/state/reducers.js @@ -113,6 +113,7 @@ function fileMatchesReducer(state = initialState.fileMatches, action) { matches: [], files: {}, loading: true, + error: false, total: undefined, }; case ACTION_UPDATE_FILE_MATCH_FILTERS_SUCCESS: @@ -136,6 +137,7 @@ function fileMatchesReducer(state = initialState.fileMatches, action) { case ACTION_FETCH_FILE_MATCHES: return { ...state, + error: false, loading: true, }; case ACTION_FETCH_FILE_MATCHES_SUCCESS: diff --git a/web/src/common/components/LoadingTrigger/LoadTrigger.js b/web/src/common/components/LoadingTrigger/LoadTrigger.js index 6e6d35d8..9962dc93 100644 --- a/web/src/common/components/LoadingTrigger/LoadTrigger.js +++ b/web/src/common/components/LoadingTrigger/LoadTrigger.js @@ -41,7 +41,7 @@ function useMessages() { } /** - * Match loading trigger + * Loading trigger */ function LoadTrigger(props) { const { @@ -96,15 +96,15 @@ LoadTrigger.propTypes = { */ error: PropTypes.bool, /** - * File loading is in progress + * Loading is in progress */ loading: PropTypes.bool.isRequired, /** - * Trigger loading of the next portion of files + * Trigger loading of the next portion of items */ onLoad: PropTypes.func.isRequired, /** - * Whether more files could be loaded + * Whether more items could be loaded */ hasMore: PropTypes.bool.isRequired, /** From ec14b867bef5c281a02d4d8b30d751d885294292 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 23 Oct 2020 22:17:08 +0700 Subject: [PATCH 29/34] Adjust graph style --- web/src/collection/components/MatchGraph/D3Graph.js | 9 ++++++--- web/src/collection/components/MatchGraph/MatchGraph.js | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/web/src/collection/components/MatchGraph/D3Graph.js b/web/src/collection/components/MatchGraph/D3Graph.js index 8672e055..911dc643 100644 --- a/web/src/collection/components/MatchGraph/D3Graph.js +++ b/web/src/collection/components/MatchGraph/D3Graph.js @@ -45,7 +45,7 @@ export default class D3Graph { .selectAll("line") .data(this.links) .join("line") - .attr("stroke-width", (d) => Math.sqrt(d.value)); + .attr("stroke-width", (d) => Math.sqrt(100 * (1 - d.distance))); const node = svg .append("g") @@ -131,9 +131,12 @@ export default class D3Graph { .forceSimulation(this.nodes) .force( "link", - d3.forceLink(this.links).id((d) => d.id) + d3 + .forceLink(this.links) + .id((d) => d.id) + .strength((d) => 0.1 * (1 - d.distance)) ) - .force("charge", d3.forceManyBody()) + .force("charge", d3.forceManyBody().strength(-400)) .force("center", d3.forceCenter(this.width / 2, this.height / 2)); } diff --git a/web/src/collection/components/MatchGraph/MatchGraph.js b/web/src/collection/components/MatchGraph/MatchGraph.js index 62e374ad..ee57391f 100644 --- a/web/src/collection/components/MatchGraph/MatchGraph.js +++ b/web/src/collection/components/MatchGraph/MatchGraph.js @@ -23,7 +23,7 @@ function getLinks(source, matches) { return matches.map((match) => ({ source: match.source, target: match.target, - value: 10 * (1 - match.distance), + distance: match.distance, })); } @@ -35,6 +35,7 @@ function getNodes(source, files) { ...Object.values(files).map((file) => ({ id: file.id, group: file.id === source.id ? 2 : 1, + file: file, })), ]; } From d63d90f9147bf490937ffa4f66bf538323407613 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 23 Oct 2020 22:42:33 +0700 Subject: [PATCH 30/34] Enable zooming --- .../components/FileMatchesPage/FileMatchesPage.js | 2 +- web/src/collection/components/MatchGraph/D3Graph.js | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/web/src/collection/components/FileMatchesPage/FileMatchesPage.js b/web/src/collection/components/FileMatchesPage/FileMatchesPage.js index 31468e9f..1ec0d165 100644 --- a/web/src/collection/components/FileMatchesPage/FileMatchesPage.js +++ b/web/src/collection/components/FileMatchesPage/FileMatchesPage.js @@ -1,4 +1,4 @@ -import React, { useCallback, useEffect, useState } from "react"; +import React, { useCallback, useState } from "react"; import clsx from "clsx"; import PropTypes from "prop-types"; import { makeStyles } from "@material-ui/styles"; diff --git a/web/src/collection/components/MatchGraph/D3Graph.js b/web/src/collection/components/MatchGraph/D3Graph.js index 911dc643..3bb55aa5 100644 --- a/web/src/collection/components/MatchGraph/D3Graph.js +++ b/web/src/collection/components/MatchGraph/D3Graph.js @@ -36,7 +36,13 @@ export default class D3Graph { .select(this.container) .attr("preserveAspectRatio", "xMidYMid meet") .attr("viewBox", [0, 0, this.width, this.height]) - .classed(this.classes.content, true); + .classed(this.classes.content, true) + .call( + d3.zoom().on("zoom", function (event) { + svg.attr("transform", event.transform); + }) + ) + .append("g"); const link = svg .append("g") From 20b6952be0f564339061ab80b989fcf554b88bf3 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 23 Oct 2020 22:56:08 +0700 Subject: [PATCH 31/34] Enable cluster navigation --- web/src/collection/components/MatchGraph/D3Graph.js | 9 +++++++-- .../collection/components/MatchGraph/MatchGraph.js | 12 +++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/web/src/collection/components/MatchGraph/D3Graph.js b/web/src/collection/components/MatchGraph/D3Graph.js index 3bb55aa5..bcb79930 100644 --- a/web/src/collection/components/MatchGraph/D3Graph.js +++ b/web/src/collection/components/MatchGraph/D3Graph.js @@ -10,7 +10,7 @@ function removeChildren(element) { } export default class D3Graph { - constructor({ links, nodes, container, classes = {} }) { + constructor({ links, nodes, container, classes = {}, onClick = () => {} }) { this.links = links.map(Object.create); this.nodes = nodes.map(Object.create); this.width = container?.clientWidth; @@ -19,6 +19,7 @@ export default class D3Graph { this.classes = classes; this.updateSize = null; this.simulation = null; + this.onClick = onClick; } /** @@ -62,7 +63,11 @@ export default class D3Graph { .join("circle") .attr("r", 15) .attr("fill", color) - .call(this._createDrag(this.simulation)); + .call(this._createDrag(this.simulation)) + .on("click", (_, data) => { + const node = this.nodes[data.index]; + this.onClick(node); + }); node.append("title").text((d) => d.id); diff --git a/web/src/collection/components/MatchGraph/MatchGraph.js b/web/src/collection/components/MatchGraph/MatchGraph.js index ee57391f..02ff9f1c 100644 --- a/web/src/collection/components/MatchGraph/MatchGraph.js +++ b/web/src/collection/components/MatchGraph/MatchGraph.js @@ -1,10 +1,12 @@ -import React, { useEffect, useRef, useState } from "react"; +import React, { useCallback, useEffect, useRef, useState } from "react"; import clsx from "clsx"; import PropTypes from "prop-types"; import { makeStyles } from "@material-ui/styles"; import D3Graph from "./D3Graph"; import MatchType from "../FileMatchesPage/MatchType"; import FileType from "../FileBrowserPage/FileType"; +import { useHistory } from "react-router-dom"; +import { routes } from "../../../routing/routes"; const useStyles = makeStyles((theme) => ({ root: { @@ -46,6 +48,13 @@ function MatchGraph(props) { const ref = useRef(null); const [graph, setGraph] = useState(null); + const history = useHistory(); + + const handleClickFile = useCallback( + (node) => history.push(routes.collection.fileURL(node.file.id)), + [] + ); + useEffect(() => { if (ref.current != null) { if (graph != null) { @@ -56,6 +65,7 @@ function MatchGraph(props) { nodes: getNodes(source, files), container: ref.current, classes: { content: classes.content }, + onClick: handleClickFile, }); newGraph.display(); setGraph(newGraph); From 1fec2c5841bcad5aec284c7d723d58cc19b8fbc4 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Sat, 24 Oct 2020 01:49:14 +0700 Subject: [PATCH 32/34] Fix tooltips --- .../components/MatchGraph/D3Graph.js | 163 +++++++++++++++++- .../components/MatchGraph/MatchGraph.js | 10 +- 2 files changed, 166 insertions(+), 7 deletions(-) diff --git a/web/src/collection/components/MatchGraph/D3Graph.js b/web/src/collection/components/MatchGraph/D3Graph.js index bcb79930..52c62d0b 100644 --- a/web/src/collection/components/MatchGraph/D3Graph.js +++ b/web/src/collection/components/MatchGraph/D3Graph.js @@ -1,4 +1,5 @@ import * as d3 from "d3"; +import { formatDuration } from "../../../common/helpers/format"; /** * Remove all element's children. @@ -9,8 +10,96 @@ function removeChildren(element) { } } +const defaultOptions = { + nodeRadius: 15, +}; + +function basename(filename) { + return filename.substring(filename.lastIndexOf("/") + 1); +} + +/** + * Class to calculate position relative to node. + */ +class NodeTracker { + constructor(node, indent) { + this.node = node; + this.indent = { x: 0, y: 0, ...indent }; + } + track() { + return [this.node.x + this.indent.x, this.node.y + this.indent.y]; + } +} + +/** + * Class to calculate position relative to edge. + */ +class EdgeTracker { + constructor(edge, indent) { + this.edge = edge; + this.indent = { x: 0, y: 0, ...indent }; + } + track() { + const x = 0.5 * (this.edge.source.x + this.edge.target.x) + this.indent.x; + const y = 0.5 * (this.edge.source.y + this.edge.target.y) + this.indent.y; + return [x, y]; + } +} + +/** + * Class to calculate position relative to mouse pointer. + */ +class MouseTracker { + constructor(indent) { + this.indent = { x: 0, y: 0, ...indent }; + } + track(event, element) { + if (event == null) { + return [undefined, undefined]; + } + const [x, y] = d3.pointer(event, element); + return [x + this.indent.x, y + this.indent.y]; + } +} + +class Tooltip { + constructor({ text, container, tracker, className }) { + this.tracker = tracker; + + this.text = container.append("text").text(text); + } + + move(event) { + const [x, y] = this.tracker.track(event, this.text.node()); + if (x != null && y != null) { + this.text.attr("x", x).attr("y", y); + } + } + + remove() { + this.text.remove(); + } +} + +function edgeWidth(edge) { + return Math.sqrt(100 * (1 - edge.distance)); +} + +function fileTooltip(file) { + const filename = basename(file.filename); + const duration = formatDuration(file.metadata.length, null, false); + return `${filename} - ${duration}`; +} + export default class D3Graph { - constructor({ links, nodes, container, classes = {}, onClick = () => {} }) { + constructor({ + links, + nodes, + container, + classes = {}, + onClick = () => {}, + options = {}, + }) { this.links = links.map(Object.create); this.nodes = nodes.map(Object.create); this.width = container?.clientWidth; @@ -20,6 +109,11 @@ export default class D3Graph { this.updateSize = null; this.simulation = null; this.onClick = onClick; + this.options = { + ...defaultOptions, + ...options, + }; + this._tooltip = null; } /** @@ -45,6 +139,9 @@ export default class D3Graph { ) .append("g"); + // Bind this for legacy context handling + const self = this; + const link = svg .append("g") .attr("stroke", "#999") @@ -52,24 +149,63 @@ export default class D3Graph { .selectAll("line") .data(this.links) .join("line") - .attr("stroke-width", (d) => Math.sqrt(100 * (1 - d.distance))); + .attr("stroke-width", (d) => edgeWidth(d)) + .on("mouseover", function (event, edge) { + d3.select(this).attr("stroke-width", (d) => 1.5 * edgeWidth(d)); + self.tooltip = new Tooltip({ + text: edge.distance.toFixed(4), + container: svg, + tracker: new MouseTracker({ x: 15 }), + }); + self.tooltip.move(event); + }) + .on("mouseout", function () { + d3.select(this).attr("stroke-width", (d) => edgeWidth(d)); + self.tooltip = null; + }) + .style("cursor", "pointer"); const node = svg .append("g") - .attr("stroke", "#fff") + .attr("stroke", "rgba(0,0,0,0)") .attr("stroke-width", 1.5) .selectAll("circle") .data(this.nodes) .join("circle") - .attr("r", 15) + .attr("r", this.options.nodeRadius) .attr("fill", color) .call(this._createDrag(this.simulation)) .on("click", (_, data) => { const node = this.nodes[data.index]; this.onClick(node); - }); + }) + .on("mouseover", function (event, node) { + d3.select(this).attr("r", self.options.nodeRadius * 1.5); + + self.tooltip = new Tooltip({ + text: fileTooltip(node.file), + container: svg, + tracker: new NodeTracker(node, { + x: self.options.nodeRadius * 2, + y: self.options.nodeRadius * 0.25, + }), + className: self.classes.tooltip, + }); + self.tooltip.move(event); + }) + .on("mouseout", function () { + self.tooltip = null; + d3.select(this).attr("r", self.options.nodeRadius); + }) + .style("cursor", "pointer"); - node.append("title").text((d) => d.id); + // node.append("title").text((data) => { + // const filename = basename(data.file.filename); + // const duration = formatDuration(data.file.metadata.length, null, false); + // return `${filename} ${duration}`; + // }); + // + // link.append("title").text((data) => data.distance.toFixed(4)); this.simulation.on("tick", () => { link @@ -79,6 +215,7 @@ export default class D3Graph { .attr("y2", (d) => d.target.y); node.attr("cx", (d) => d.x).attr("cy", (d) => d.y); + this.tooltip?.move(); }); this.updateSize = () => { @@ -110,11 +247,13 @@ export default class D3Graph { } event.subject.fx = event.subject.x; event.subject.fy = event.subject.y; + this.tooltip?.move(event); }; const dragged = (event) => { event.subject.fx = event.x; event.subject.fy = event.y; + this.tooltip?.move(event); }; const dragEnded = (event) => { @@ -123,6 +262,7 @@ export default class D3Graph { } event.subject.fx = null; event.subject.fy = null; + this.tooltip?.move(event); }; return d3 @@ -151,6 +291,17 @@ export default class D3Graph { .force("center", d3.forceCenter(this.width / 2, this.height / 2)); } + get tooltip() { + return this._tooltip; + } + + set tooltip(tooltip) { + if (this._tooltip != null) { + this._tooltip.remove(); + } + this._tooltip = tooltip; + } + /** * Remove graph elements, remove all listeners, clear container. */ diff --git a/web/src/collection/components/MatchGraph/MatchGraph.js b/web/src/collection/components/MatchGraph/MatchGraph.js index 02ff9f1c..1201508d 100644 --- a/web/src/collection/components/MatchGraph/MatchGraph.js +++ b/web/src/collection/components/MatchGraph/MatchGraph.js @@ -16,6 +16,14 @@ const useStyles = makeStyles((theme) => ({ width: "100%", minHeight: 500, }, + tooltip: { + position: "absolute", + textAlign: "center", + padding: theme.spacing(2), + backgroundColor: theme.palette.common.white, + borderRadius: theme.spacing(2), + boxShadow: "0 12px 18px 0 rgba(0,0,0,0.08)", + }, })); /** @@ -64,7 +72,7 @@ function MatchGraph(props) { links: getLinks(source, matches), nodes: getNodes(source, files), container: ref.current, - classes: { content: classes.content }, + classes: { content: classes.content, tooltip: classes.tooltip }, onClick: handleClickFile, }); newGraph.display(); From a69700fdd05fefb91a300294280a66b27e125050 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Sat, 24 Oct 2020 01:53:14 +0700 Subject: [PATCH 33/34] Make edge opacity dynamic --- web/src/collection/components/MatchGraph/D3Graph.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/collection/components/MatchGraph/D3Graph.js b/web/src/collection/components/MatchGraph/D3Graph.js index 52c62d0b..caf1b112 100644 --- a/web/src/collection/components/MatchGraph/D3Graph.js +++ b/web/src/collection/components/MatchGraph/D3Graph.js @@ -145,10 +145,10 @@ export default class D3Graph { const link = svg .append("g") .attr("stroke", "#999") - .attr("stroke-opacity", 0.6) .selectAll("line") .data(this.links) .join("line") + .attr("stroke-opacity", (d) => 1 - d.distance) .attr("stroke-width", (d) => edgeWidth(d)) .on("mouseover", function (event, edge) { d3.select(this).attr("stroke-width", (d) => 1.5 * edgeWidth(d)); From d03df19106e32257c2241ba40b4eb90df053c544 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Sat, 24 Oct 2020 02:25:54 +0700 Subject: [PATCH 34/34] Make color scheme static --- .../components/MatchGraph/D3Graph.js | 27 ++++++------------- .../components/MatchGraph/MatchGraph.js | 22 ++++++++++++--- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/web/src/collection/components/MatchGraph/D3Graph.js b/web/src/collection/components/MatchGraph/D3Graph.js index caf1b112..d2dc637e 100644 --- a/web/src/collection/components/MatchGraph/D3Graph.js +++ b/web/src/collection/components/MatchGraph/D3Graph.js @@ -31,21 +31,6 @@ class NodeTracker { } } -/** - * Class to calculate position relative to edge. - */ -class EdgeTracker { - constructor(edge, indent) { - this.edge = edge; - this.indent = { x: 0, y: 0, ...indent }; - } - track() { - const x = 0.5 * (this.edge.source.x + this.edge.target.x) + this.indent.x; - const y = 0.5 * (this.edge.source.y + this.edge.target.y) + this.indent.y; - return [x, y]; - } -} - /** * Class to calculate position relative to mouse pointer. */ @@ -63,9 +48,8 @@ class MouseTracker { } class Tooltip { - constructor({ text, container, tracker, className }) { + constructor({ text, container, tracker }) { this.tracker = tracker; - this.text = container.append("text").text(text); } @@ -91,6 +75,12 @@ function fileTooltip(file) { return `${filename} - ${duration}`; } +const colorScheme = { + 1: "#2ca02c", + 2: "#1f77b4", + 3: "#ff7f0e", +}; + export default class D3Graph { constructor({ links, @@ -121,7 +111,7 @@ export default class D3Graph { */ display() { const scale = d3.scaleOrdinal(d3.schemeCategory10); - const color = (d) => scale(d.group); + const color = (d) => colorScheme[d.group] || scale(d.group); this.simulation = this._createForceSimulation(); @@ -189,7 +179,6 @@ export default class D3Graph { x: self.options.nodeRadius * 2, y: self.options.nodeRadius * 0.25, }), - className: self.classes.tooltip, }); self.tooltip.move(event); }) diff --git a/web/src/collection/components/MatchGraph/MatchGraph.js b/web/src/collection/components/MatchGraph/MatchGraph.js index 1201508d..dde0df67 100644 --- a/web/src/collection/components/MatchGraph/MatchGraph.js +++ b/web/src/collection/components/MatchGraph/MatchGraph.js @@ -40,11 +40,27 @@ function getLinks(source, matches) { /** * Get collection of nodes compatible with D3Graph */ -function getNodes(source, files) { +function getNodes(source, files, matches) { + const children = new Set(); + for (const match of matches) { + if (match.source === source.id) { + children.add(match.target); + } else if (match.target === source.id) { + children.add(match.source); + } + } + const group = (file) => { + if (file.id === source.id) { + return 1; + } else if (children.has(file.id)) { + return 2; + } + return 3; + }; return [ ...Object.values(files).map((file) => ({ id: file.id, - group: file.id === source.id ? 2 : 1, + group: group(file), file: file, })), ]; @@ -70,7 +86,7 @@ function MatchGraph(props) { } const newGraph = new D3Graph({ links: getLinks(source, matches), - nodes: getNodes(source, files), + nodes: getNodes(source, files, matches), container: ref.current, classes: { content: classes.content, tooltip: classes.tooltip }, onClick: handleClickFile,