diff --git a/.github/workflows/schedule.yml b/.github/workflows/schedule.yml index ed0f9fbc3120..097c1b5cc034 100644 --- a/.github/workflows/schedule.yml +++ b/.github/workflows/schedule.yml @@ -120,9 +120,6 @@ jobs: with: python-version: '3.8' - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - name: Getting CVAT UI cache from the default branch uses: actions/cache@v3 with: @@ -155,9 +152,9 @@ jobs: - name: Pull CVAT server image run: | docker pull ${{ steps.meta-server.outputs.tags }} - docker tag ${{ steps.meta-server.outputs.tags }} cvat/server:dev + docker tag ${{ steps.meta-server.outputs.tags }} cvat/server:local docker tag ${{ steps.meta-server.outputs.tags }} cvat/server:latest - docker tag cvat/ui:latest cvat/ui:dev + docker tag cvat/ui:latest cvat/ui:local - name: OPA tests run: | @@ -218,9 +215,6 @@ jobs: with: node-version: '16.x' - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - name: Login to Docker Hub uses: docker/login-action@v2 with: @@ -349,9 +343,6 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - name: CVAT server. Extract metadata (tags, labels) for Docker id: meta-server uses: docker/metadata-action@master @@ -369,7 +360,7 @@ jobs: - name: Pull CVAT server image run: | docker pull ${{ steps.meta-server.outputs.tags }} - docker tag ${{ steps.meta-server.outputs.tags }} cvat/server:dev + docker tag ${{ steps.meta-server.outputs.tags }} cvat/server:local - name: Downloading coverage results uses: actions/download-artifact@v2 diff --git a/.remarkignore b/.remarkignore index 4f1b9bbf8425..00bd38c6f1ee 100644 --- a/.remarkignore +++ b/.remarkignore @@ -1,2 +1,4 @@ cvat-sdk/docs/ cvat-sdk/README.md +.env/ +site/themes/ diff --git a/.vscode/launch.json b/.vscode/launch.json index 0b02a0110cad..884ed02c3b6f 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -270,6 +270,28 @@ "env": {}, "console": "internalConsole" }, + { + "name": "server: RQ - cleaning", + "type": "python", + "request": "launch", + "stopOnEntry": false, + "justMyCode": false, + "python": "${command:python.interpreterPath}", + "program": "${workspaceRoot}/manage.py", + "args": [ + "rqworker", + "cleaning", + "--worker-class", + "cvat.rqworker.SimpleWorker" + ], + "django": true, + "cwd": "${workspaceFolder}", + "env": { + "DJANGO_LOG_SERVER_HOST": "localhost", + "DJANGO_LOG_SERVER_PORT": "8282" + }, + "console": "internalConsole" + }, { "name": "server: migrate", "type": "python", @@ -433,6 +455,7 @@ "server: RQ - annotation", "server: RQ - webhooks", "server: RQ - scheduler", + "server: RQ - cleaning", "server: git", ] } diff --git a/CHANGELOG.md b/CHANGELOG.md index 6586ae7d6608..e7edc2f73ddd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,27 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## \[2.4.6] - 2023-06-09 +### Added +- \[Server API\] An option to supply custom file ordering for task data uploads () +- New option ``semi-auto`` is available as annotations source () + +### Changed +- Allowed to use dataset manifest for the `predefined` sorting method for task data () + +### Changed +- Replaced Apache mod_wsgi with Uvicorn ASGI server for backend use() + +### Fixed +- Incorrect location of temporary file during job annotation import.() +- Deletion of uploaded file along with annotations/backups when an RQ job + has been initiated, but no subsequent status check requests have been made.() +- Deletion of uploaded files, including annotations and backups, + after they have been uploaded to the server using the TUS protocol but before an RQ job has been initiated. () +- Simultaneous creation of tasks or projects with identical names from backups by multiple users.() +- \[Server API\] The `predefined` sorting method for task data uploads () +- Allowed slashes in export filenames. () + ## \[2.4.5] - 2023-06-02 ### Added - Integrated support for sharepoint and cloud storage files, along with diff --git a/Dockerfile b/Dockerfile index 5cb6e7295a9a..974aab6eb888 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,6 @@ FROM ${BASE_IMAGE} as build-image-base RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get --no-install-recommends install -yq \ - apache2-dev \ curl \ g++ \ gcc \ @@ -42,8 +41,8 @@ RUN curl -sL https://github.com/cisco/openh264/archive/v${OPENH264_VERSION}.tar. make -j5 && make install-shared PREFIX=${PREFIX} && make clean WORKDIR /tmp/ffmpeg -RUN curl -sL https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.bz2 --output - | \ - tar -jx --strip-components=1 && \ +RUN curl -sL https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.gz --output - | \ + tar -zx --strip-components=1 && \ ./configure --disable-nonfree --disable-gpl --enable-libopenh264 \ --enable-shared --disable-static --disable-doc --disable-programs --prefix="${PREFIX}" && \ make -j5 && make install && make clean @@ -102,19 +101,18 @@ ENV DJANGO_CONFIGURATION=${DJANGO_CONFIGURATION} # Install necessary apt packages RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get --no-install-recommends install -yq \ - apache2 \ bzip2 \ ca-certificates \ curl \ git \ git-lfs \ - libapache2-mod-xsendfile \ libgeos-c1v5 \ libgl1 \ libgomp1 \ libldap-2.5-0 \ libpython3.10 \ libsasl2-2 \ + nginx \ p7zip-full \ poppler-utils \ python3 \ @@ -172,10 +170,11 @@ RUN if [ "${CVAT_DEBUG_ENABLED}" = 'yes' ]; then \ fi # Install and initialize CVAT, copy all necessary files +COPY cvat/nginx.conf /etc/nginx/nginx.conf COPY --chown=${USER} components /tmp/components COPY --chown=${USER} supervisord/ ${HOME}/supervisord COPY --chown=${USER} ssh ${HOME}/.ssh -COPY --chown=${USER} mod_wsgi.conf wait-for-it.sh manage.py ${HOME}/ +COPY --chown=${USER} wait-for-it.sh manage.py backend_entrypoint.sh ${HOME}/ COPY --chown=${USER} utils/ ${HOME}/utils COPY --chown=${USER} cvat/ ${HOME}/cvat diff --git a/backend_entrypoint.sh b/backend_entrypoint.sh new file mode 100755 index 000000000000..dc7478365b2f --- /dev/null +++ b/backend_entrypoint.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +set -e + +${HOME}/wait-for-it.sh ${CVAT_POSTGRES_HOST}:5432 -t 0 +python3 ${HOME}/manage.py migrate +python3 ${HOME}/manage.py collectstatic --no-input +exec /usr/bin/supervisord -c supervisord/server.conf diff --git a/cvat-canvas/package.json b/cvat-canvas/package.json index 20c28f199d41..d7791a53f322 100644 --- a/cvat-canvas/package.json +++ b/cvat-canvas/package.json @@ -1,6 +1,6 @@ { "name": "cvat-canvas", - "version": "2.16.6", + "version": "2.16.7", "description": "Part of Computer Vision Annotation Tool which presents its canvas library", "main": "src/canvas.ts", "scripts": { diff --git a/cvat-canvas/src/typescript/shared.ts b/cvat-canvas/src/typescript/shared.ts index b288d56d2a7a..75d69be5ac75 100644 --- a/cvat-canvas/src/typescript/shared.ts +++ b/cvat-canvas/src/typescript/shared.ts @@ -41,7 +41,7 @@ export interface DrawnState { occluded?: boolean; hidden?: boolean; lock: boolean; - source: 'AUTO' | 'MANUAL'; + source: 'AUTO' | 'SEMI-AUTO' | 'MANUAL'; shapeType: string; points?: number[]; rotation: number; diff --git a/cvat-core/package.json b/cvat-core/package.json index 932c8284b9c7..8f20feefc61f 100644 --- a/cvat-core/package.json +++ b/cvat-core/package.json @@ -1,6 +1,6 @@ { "name": "cvat-core", - "version": "9.1.1", + "version": "9.1.3", "description": "Part of Computer Vision Tool which presents an interface for client-side integration", "main": "src/api.ts", "scripts": { diff --git a/cvat-core/src/annotations-objects.ts b/cvat-core/src/annotations-objects.ts index 38df9bd9360b..4ff8d04e8503 100644 --- a/cvat-core/src/annotations-objects.ts +++ b/cvat-core/src/annotations-objects.ts @@ -31,6 +31,14 @@ function copyShape(state: TrackedShape, data: Partial = {}): Track }; } +function computeNewSource(currentSource: Source): Source { + if ([Source.AUTO, Source.SEMI_AUTO].includes(currentSource)) { + return Source.SEMI_AUTO; + } + + return Source.MANUAL; +} + export interface BasicInjection { labels: Record; groups: { max: number }; @@ -596,7 +604,7 @@ export class Shape extends Drawn { const undoRotation = this.rotation; const redoRotation = rotation; const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); this.history.do( HistoryActions.CHANGED_ROTATION, @@ -622,7 +630,7 @@ export class Shape extends Drawn { const undoPoints = this.points; const redoPoints = points; const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); this.history.do( HistoryActions.CHANGED_POINTS, @@ -648,7 +656,7 @@ export class Shape extends Drawn { const undoOccluded = this.occluded; const redoOccluded = occluded; const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); this.history.do( HistoryActions.CHANGED_OCCLUDED, @@ -674,7 +682,7 @@ export class Shape extends Drawn { const undoOutside = this.outside; const redoOutside = outside; const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); this.history.do( HistoryActions.CHANGED_OCCLUDED, @@ -700,7 +708,7 @@ export class Shape extends Drawn { const undoZOrder = this.zOrder; const redoZOrder = zOrder; const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); this.history.do( HistoryActions.CHANGED_ZORDER, @@ -1179,7 +1187,7 @@ export class Track extends Drawn { protected saveRotation(rotation: number, frame: number): void { const wasKeyframe = frame in this.shapes; const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); const undoShape = wasKeyframe ? this.shapes[frame] : undefined; const redoShape = wasKeyframe ? { ...this.shapes[frame], rotation } : copyShape(this.get(frame), { rotation }); @@ -1199,7 +1207,7 @@ export class Track extends Drawn { protected savePoints(points: number[], frame: number): void { const wasKeyframe = frame in this.shapes; const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); const undoShape = wasKeyframe ? this.shapes[frame] : undefined; const redoShape = wasKeyframe ? { ...this.shapes[frame], points } : copyShape(this.get(frame), { points }); @@ -1219,7 +1227,7 @@ export class Track extends Drawn { protected saveOutside(frame: number, outside: boolean): void { const wasKeyframe = frame in this.shapes; const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); const undoShape = wasKeyframe ? this.shapes[frame] : undefined; const redoShape = wasKeyframe ? { ...this.shapes[frame], outside } : @@ -1240,7 +1248,7 @@ export class Track extends Drawn { protected saveOccluded(occluded: boolean, frame: number): void { const wasKeyframe = frame in this.shapes; const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); const undoShape = wasKeyframe ? this.shapes[frame] : undefined; const redoShape = wasKeyframe ? { ...this.shapes[frame], occluded } : @@ -1261,7 +1269,7 @@ export class Track extends Drawn { protected saveZOrder(zOrder: number, frame: number): void { const wasKeyframe = frame in this.shapes; const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); const undoShape = wasKeyframe ? this.shapes[frame] : undefined; const redoShape = wasKeyframe ? { ...this.shapes[frame], zOrder } : @@ -1287,7 +1295,7 @@ export class Track extends Drawn { } const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); const undoShape = wasKeyframe ? this.shapes[frame] : undefined; const redoShape = keyframe ? copyShape(this.get(frame)) : undefined; @@ -2027,7 +2035,7 @@ export class SkeletonShape extends Shape { protected saveRotation(rotation, frame): void { const undoSkeletonPoints = this.elements.map((element) => element.points); const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); const bbox = computeWrappingBox(undoSkeletonPoints.flat()); const [cx, cy] = [bbox.x + bbox.width / 2, bbox.y + bbox.height / 2]; @@ -2075,7 +2083,7 @@ export class SkeletonShape extends Shape { const updateElements = (affectedElements, action, property: 'points' | 'occluded' | 'hidden' | 'lock') => { const undoSkeletonProperties = this.elements.map((element) => element[property]); const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); try { this.history.freeze(true); @@ -2246,7 +2254,7 @@ export class MaskShape extends Shape { const points = mask2Rle(maskPoints); const redoPoints = points; - const redoSource = Source.MANUAL; + const redoSource = computeNewSource(this.source); const undo = (): void => { this.points = undoPoints; @@ -2807,7 +2815,7 @@ export class SkeletonTrack extends Track { protected saveRotation(rotation: number, frame: number): void { const undoSkeletonShapes = this.elements.map((element) => element.shapes[frame]); const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); const elementsData = this.elements.map((element) => element.get(frame)); const skeletonPoints = elementsData.map((data) => data.points); @@ -2952,7 +2960,7 @@ export class SkeletonTrack extends Track { const undoSkeletonProperties = this.elements.map((element) => element[property] || null); const undoSkeletonShapes = this.elements.map((element) => element.shapes[frame]); const undoSource = this.source; - const redoSource = this.readOnlyFields.includes('source') ? this.source : Source.MANUAL; + const redoSource = this.readOnlyFields.includes('source') ? this.source : computeNewSource(this.source); const errors = []; try { diff --git a/cvat-core/src/enums.ts b/cvat-core/src/enums.ts index 2ee04197cbae..4c260bb98884 100644 --- a/cvat-core/src/enums.ts +++ b/cvat-core/src/enums.ts @@ -79,6 +79,7 @@ export enum ShapeType { export enum Source { MANUAL = 'manual', + SEMI_AUTO = 'semi-auto', AUTO = 'auto', } diff --git a/cvat-core/src/object-state.ts b/cvat-core/src/object-state.ts index 30646e417379..59eb89d159f3 100644 --- a/cvat-core/src/object-state.ts +++ b/cvat-core/src/object-state.ts @@ -454,7 +454,7 @@ export default class ObjectState { }), ); - if ([Source.MANUAL, Source.AUTO].includes(serialized.source)) { + if ([Source.MANUAL, Source.SEMI_AUTO, Source.AUTO].includes(serialized.source)) { data.source = serialized.source; } if (typeof serialized.zOrder === 'number') { diff --git a/cvat-core/src/server-proxy.ts b/cvat-core/src/server-proxy.ts index 1e7d4d205a23..ec692db17aad 100644 --- a/cvat-core/src/server-proxy.ts +++ b/cvat-core/src/server-proxy.ts @@ -731,7 +731,7 @@ function exportDataset(instanceType: 'projects' | 'jobs' | 'tasks') { const params: Params = { ...enableOrganization(), ...configureStorage(targetStorage, useDefaultSettings), - ...(name ? { filename: name.replace(/\//g, '_') } : {}), + ...(name ? { filename: name } : {}), format, }; @@ -783,13 +783,14 @@ async function importDataset( }; const url = `${backendAPI}/projects/${id}/dataset`; + let rqId: string; async function wait() { return new Promise((resolve, reject) => { async function requestStatus() { try { const response = await Axios.get(url, { - params: { ...params, action: 'import_status' }, + params: { ...params, action: 'import_status', rq_id: rqId }, }); if (response.status === 202) { if (response.data.message) { @@ -812,10 +813,11 @@ async function importDataset( if (isCloudStorage) { try { - await Axios.post(url, + const response = await Axios.post(url, new FormData(), { params, }); + rqId = response.data.rq_id; } catch (errorData) { throw generateError(errorData); } @@ -837,11 +839,12 @@ async function importDataset( headers: { 'Upload-Start': true }, }); await chunkUpload(file, uploadConfig); - await Axios.post(url, + const response = await Axios.post(url, new FormData(), { params, headers: { 'Upload-Finish': true }, }); + rqId = response.data.rq_id; } catch (errorData) { throw generateError(errorData); } @@ -1617,6 +1620,7 @@ async function uploadAnnotations( filename: typeof file === 'string' ? file : file.name, conv_mask_to_poly: options.convMaskToPoly, }; + let rqId: string; const url = `${backendAPI}/${session}s/${id}/annotations`; async function wait() { @@ -1627,7 +1631,7 @@ async function uploadAnnotations( url, new FormData(), { - params, + params: { ...params, rq_id: rqId }, }, ); if (response.status === 202) { @@ -1646,10 +1650,11 @@ async function uploadAnnotations( if (isCloudStorage) { try { - await Axios.post(url, + const response = await Axios.post(url, new FormData(), { params, }); + rqId = response.data.rq_id; } catch (errorData) { throw generateError(errorData); } @@ -1667,11 +1672,12 @@ async function uploadAnnotations( headers: { 'Upload-Start': true }, }); await chunkUpload(file, uploadConfig); - await Axios.post(url, + const response = await Axios.post(url, new FormData(), { params, headers: { 'Upload-Finish': true }, }); + rqId = response.data.rq_id; } catch (errorData) { throw generateError(errorData); } diff --git a/cvat-sdk/cvat_sdk/core/proxies/tasks.py b/cvat-sdk/cvat_sdk/core/proxies/tasks.py index 74e2b5fbac2c..29e84dc3545d 100644 --- a/cvat-sdk/cvat_sdk/core/proxies/tasks.py +++ b/cvat-sdk/cvat_sdk/core/proxies/tasks.py @@ -450,6 +450,6 @@ def create_from_backup( ) task_id = json.loads(response.data)["id"] - self._client.logger.info(f"Task has been imported sucessfully. Task ID: {task_id}") + self._client.logger.info(f"Task has been imported successfully. Task ID: {task_id}") return self.retrieve(task_id) diff --git a/cvat-sdk/cvat_sdk/core/uploading.py b/cvat-sdk/cvat_sdk/core/uploading.py index ceacda782ce3..85815ea36286 100644 --- a/cvat-sdk/cvat_sdk/core/uploading.py +++ b/cvat-sdk/cvat_sdk/core/uploading.py @@ -1,9 +1,10 @@ -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT from __future__ import annotations +import json import os from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple @@ -89,6 +90,7 @@ def create_url(self): headers["upload-length"] = str(self.file_size) headers["upload-metadata"] = ",".join(self.encode_metadata()) resp = self._api_client.rest_client.POST(self.client.url, headers=headers) + self.real_filename = resp.headers.get("Upload-Filename") url = resp.headers.get("location") if url is None: msg = "Attempt to retrieve create file url with status {}".format(resp.status_code) @@ -179,9 +181,10 @@ def upload_file( assert meta["filename"] self._tus_start_upload(url, query_params=query_params) - self._upload_file_data_with_tus( + real_filename = self._upload_file_data_with_tus( url=url, filename=filename, meta=meta, pbar=pbar, logger=logger ) + query_params["filename"] = real_filename return self._tus_finish_upload(url, query_params=query_params, fields=fields) def _wait_for_completion( @@ -216,7 +219,9 @@ def _make_tus_uploader(api_client: ApiClient, url: str, **kwargs): return _MyTusUploader(client=client, api_client=api_client, **kwargs) - def _upload_file_data_with_tus(self, url, filename, *, meta=None, pbar=None, logger=None): + def _upload_file_data_with_tus( + self, url, filename, *, meta=None, pbar=None, logger=None + ) -> str: file_size = filename.stat().st_size if pbar is None: pbar = NullProgressReporter() @@ -233,6 +238,7 @@ def _upload_file_data_with_tus(self, url, filename, *, meta=None, pbar=None, log log_func=logger, ) tus_uploader.upload() + return tus_uploader.real_filename def _tus_start_upload(self, url, *, query_params=None): response = self._client.api_client.rest_client.POST( @@ -273,17 +279,21 @@ def upload_file_and_wait( ): url = self._client.api_map.make_endpoint_url(endpoint.path, kwsub=url_params) params = {"format": format_name, "filename": filename.name} - self.upload_file( + response = self.upload_file( url, filename, pbar=pbar, query_params=params, meta={"filename": params["filename"]} ) + rq_id = json.loads(response.data).get("rq_id") + assert rq_id, "The rq_id was not found in the response" + params["rq_id"] = rq_id + self._wait_for_completion( url, success_status=201, positive_statuses=[202], status_check_period=status_check_period, query_params=params, - method="POST", + method="PUT", ) @@ -301,12 +311,17 @@ def upload_file_and_wait( ): url = self._client.api_map.make_endpoint_url(upload_endpoint.path, kwsub=url_params) params = {"format": format_name, "filename": filename.name} - self.upload_file( + response = self.upload_file( url, filename, pbar=pbar, query_params=params, meta={"filename": params["filename"]} ) + rq_id = json.loads(response.data).get("rq_id") + assert rq_id, "The rq_id was not found in the response" url = self._client.api_map.make_endpoint_url(retrieve_endpoint.path, kwsub=url_params) - params = {"action": "import_status"} + params = { + "action": "import_status", + "rq_id": rq_id, + } self._wait_for_completion( url, success_status=201, @@ -335,6 +350,10 @@ def upload_files( if pbar is not None: pbar.start(total_size, desc="Uploading data") + if str(kwargs.get("sorting_method")).lower() == "predefined": + # Request file ordering, because we reorder files to send more efficiently + kwargs.setdefault("upload_file_order", [p.name for p in resources]) + self._tus_start_upload(url) for group, group_size in bulk_file_groups: @@ -359,7 +378,6 @@ def upload_files( pbar.advance(group_size) for filename in separate_files: - # TODO: check if basename produces invalid paths here, can lead to overwriting self._upload_file_data_with_tus( url, filename, diff --git a/cvat-ui/package.json b/cvat-ui/package.json index 9e4994a994b7..c9dac14e6041 100644 --- a/cvat-ui/package.json +++ b/cvat-ui/package.json @@ -1,6 +1,6 @@ { "name": "cvat-ui", - "version": "1.51.1", + "version": "1.51.2", "description": "CVAT single-page application", "main": "src/index.tsx", "scripts": { diff --git a/cvat-ui/src/components/annotation-page/standard-workspace/controls-side-bar/opencv-control.tsx b/cvat-ui/src/components/annotation-page/standard-workspace/controls-side-bar/opencv-control.tsx index 76c60e92af24..eaa478dc8ea4 100644 --- a/cvat-ui/src/components/annotation-page/standard-workspace/controls-side-bar/opencv-control.tsx +++ b/cvat-ui/src/components/annotation-page/standard-workspace/controls-side-bar/opencv-control.tsx @@ -278,6 +278,7 @@ class OpenCVControlComponent extends React.PureComponent label.id === activeLabelID)[0], points: openCVWrapper.contours .approxPoly(finalPoints, thresholdFromAccuracy(approxPolyAccuracy)) @@ -321,6 +322,7 @@ class OpenCVControlComponent extends React.PureComponent { const state = new core.classes.ObjectState({ shapeType: ShapeType.RECTANGLE, objectType: ObjectType.TRACK, + source: core.enums.Source.SEMI_AUTO, zOrder: curZOrder, label, points, @@ -849,6 +850,7 @@ export class ToolsControlComponent extends React.PureComponent { const object = new core.classes.ObjectState({ frame, objectType: ObjectType.SHAPE, + source: core.enums.Source.SEMI_AUTO, label: labels.length ? labels.filter((label: any) => label.id === activeLabelID)[0] : null, shapeType: ShapeType.POLYGON, points: points.flat(), @@ -870,6 +872,7 @@ export class ToolsControlComponent extends React.PureComponent { const object = new core.classes.ObjectState({ frame, objectType: ObjectType.SHAPE, + source: core.enums.Source.SEMI_AUTO, label: labels.length ? labels.filter((label: any) => label.id === activeLabelID)[0] : null, shapeType: ShapeType.MASK, points: maskPoints, @@ -1230,7 +1233,7 @@ export class ToolsControlComponent extends React.PureComponent { objectType: ObjectType.SHAPE, frame, occluded: false, - source: 'auto', + source: core.enums.Source.AUTO, attributes: (data.attributes as { name: string, value: string }[]) .reduce((acc, attr) => { const [modelAttr] = Object.entries(body.mapping[modelLabel].attributes) diff --git a/cvat/__init__.py b/cvat/__init__.py index 6695cdf6e2b8..3dcd99f5a63b 100644 --- a/cvat/__init__.py +++ b/cvat/__init__.py @@ -4,6 +4,6 @@ from cvat.utils.version import get_version -VERSION = (2, 4, 5, 'final', 0) +VERSION = (2, 4, 6, 'final', 0) __version__ = get_version(VERSION) diff --git a/cvat/apps/dataset_manager/annotation.py b/cvat/apps/dataset_manager/annotation.py index 7fb5b84b202b..09d18df812fa 100644 --- a/cvat/apps/dataset_manager/annotation.py +++ b/cvat/apps/dataset_manager/annotation.py @@ -421,6 +421,7 @@ def to_shapes(self, end_frame, end_skeleton_frame=None): ): shape["label_id"] = track["label_id"] shape["group"] = track["group"] + shape["source"] = track["source"] shape["track_id"] = idx shape["attributes"] += track["attributes"] shape["elements"] = [] diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 9774627b19e2..895e8eae2286 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -1755,7 +1755,7 @@ def reduce_fn(acc, v): track_id = ann.attributes.pop('track_id', None) source = ann.attributes.pop('source').lower() \ - if ann.attributes.get('source', '').lower() in {'auto', 'manual'} else 'manual' + if ann.attributes.get('source', '').lower() in {'auto', 'semi-auto', 'manual'} else 'manual' shape_type = shapes[ann.type] if track_id is None or 'keyframe' not in ann.attributes or dm_dataset.format not in ['cvat', 'datumaro', 'sly_pointcloud']: @@ -1769,7 +1769,7 @@ def reduce_fn(acc, v): element_occluded = element.visibility[0] == dm.Points.Visibility.hidden element_outside = element.visibility[0] == dm.Points.Visibility.absent element_source = element.attributes.pop('source').lower() \ - if element.attributes.get('source', '').lower() in {'auto', 'manual'} else 'manual' + if element.attributes.get('source', '').lower() in {'auto', 'semi-auto', 'manual'} else 'manual' elements.append(instance_data.LabeledShape( type=shapes[element.type], frame=frame_number, @@ -1843,7 +1843,7 @@ def reduce_fn(acc, v): for n, v in element.attributes.items() ] element_source = element.attributes.pop('source').lower() \ - if element.attributes.get('source', '').lower() in {'auto', 'manual'} else 'manual' + if element.attributes.get('source', '').lower() in {'auto', 'semi-auto', 'manual'} else 'manual' tracks[track_id]['elements'][element.label].shapes.append(instance_data.TrackedShape( type=shapes[element.type], frame=frame_number, diff --git a/cvat/apps/dataset_manager/project.py b/cvat/apps/dataset_manager/project.py index e52fb2ebab88..bf621fff7c9a 100644 --- a/cvat/apps/dataset_manager/project.py +++ b/cvat/apps/dataset_manager/project.py @@ -7,6 +7,7 @@ from tempfile import TemporaryDirectory import rq from typing import Any, Callable, List, Mapping, Tuple +from datumaro.components.errors import DatasetError, DatasetImportError, DatasetNotFoundError from django.db import transaction @@ -16,7 +17,7 @@ from cvat.apps.dataset_manager.task import TaskAnnotation from .annotation import AnnotationIR -from .bindings import ProjectData, load_dataset_data +from .bindings import ProjectData, load_dataset_data, CvatImportError from .formats.registry import make_exporter, make_importer def export_project(project_id, dst_file, format_name, @@ -160,7 +161,7 @@ def data(self) -> dict: raise NotImplementedError() @transaction.atomic -def import_dataset_as_project(project_id, dataset_file, format_name, conv_mask_to_poly): +def import_dataset_as_project(src_file, project_id, format_name, conv_mask_to_poly): rq_job = rq.get_current_job() rq_job.meta['status'] = 'Dataset import has been started...' rq_job.meta['progress'] = 0. @@ -170,5 +171,8 @@ def import_dataset_as_project(project_id, dataset_file, format_name, conv_mask_t project.init_from_db() importer = make_importer(format_name) - with open(dataset_file, 'rb') as f: - project.import_dataset(f, importer, conv_mask_to_poly=conv_mask_to_poly) + with open(src_file, 'rb') as f: + try: + project.import_dataset(f, importer, conv_mask_to_poly=conv_mask_to_poly) + except (DatasetError, DatasetImportError, DatasetNotFoundError) as ex: + raise CvatImportError(str(ex)) diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index c596db9920de..452a93505f90 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -8,6 +8,7 @@ from copy import deepcopy from enum import Enum from tempfile import TemporaryDirectory +from datumaro.components.errors import DatasetError, DatasetImportError, DatasetNotFoundError from django.db import transaction from django.db.models.query import Prefetch @@ -19,11 +20,10 @@ from cvat.apps.profiler import silk_profile from .annotation import AnnotationIR, AnnotationManager -from .bindings import JobData, TaskData +from .bindings import JobData, TaskData, CvatImportError from .formats.registry import make_exporter, make_importer from .util import bulk_create - class dotdict(OrderedDict): """dot.notation access to dictionary attributes""" __getattr__ = OrderedDict.get @@ -853,19 +853,25 @@ def export_task(task_id, dst_file, format_name, server_url=None, save_images=Fal task.export(f, exporter, host=server_url, save_images=save_images) @transaction.atomic -def import_task_annotations(task_id, src_file, format_name, conv_mask_to_poly): +def import_task_annotations(src_file, task_id, format_name, conv_mask_to_poly): task = TaskAnnotation(task_id) task.init_from_db() importer = make_importer(format_name) with open(src_file, 'rb') as f: - task.import_annotations(f, importer, conv_mask_to_poly=conv_mask_to_poly) + try: + task.import_annotations(f, importer, conv_mask_to_poly=conv_mask_to_poly) + except (DatasetError, DatasetImportError, DatasetNotFoundError) as ex: + raise CvatImportError(str(ex)) @transaction.atomic -def import_job_annotations(job_id, src_file, format_name, conv_mask_to_poly): +def import_job_annotations(src_file, job_id, format_name, conv_mask_to_poly): job = JobAnnotation(job_id) job.init_from_db() importer = make_importer(format_name) with open(src_file, 'rb') as f: - job.import_annotations(f, importer, conv_mask_to_poly=conv_mask_to_poly) + try: + job.import_annotations(f, importer, conv_mask_to_poly=conv_mask_to_poly) + except (DatasetError, DatasetImportError, DatasetNotFoundError) as ex: + raise CvatImportError(str(ex)) diff --git a/cvat/apps/dataset_manager/tests/test_formats.py b/cvat/apps/dataset_manager/tests/test_formats.py index 86ef91efc85b..e1fa84d874d3 100644 --- a/cvat/apps/dataset_manager/tests/test_formats.py +++ b/cvat/apps/dataset_manager/tests/test_formats.py @@ -923,8 +923,7 @@ def _test_can_import_annotations(self, task, import_format): expected_ann = TaskAnnotation(task["id"]) expected_ann.init_from_db() - dm.task.import_task_annotations(task["id"], - file_path, import_format, True) + dm.task.import_task_annotations(file_path, task["id"], import_format, True) actual_ann = TaskAnnotation(task["id"]) actual_ann.init_from_db() @@ -976,6 +975,6 @@ def test_can_import_mots_annotations_with_splited_masks(self): task.update() task = self._create_task(task, images) - dm.task.import_task_annotations(task['id'], dataset_path, format_name, True) + dm.task.import_task_annotations(dataset_path, task['id'], format_name, True) self._test_can_import_annotations(task, format_name) diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index ba133cc69953..5d652bf7285d 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -44,7 +44,6 @@ def get_export_cache_dir(db_instance): PROJECT_CACHE_TTL = DEFAULT_CACHE_TTL / 3 JOB_CACHE_TTL = DEFAULT_CACHE_TTL - def export(dst_format, project_id=None, task_id=None, job_id=None, server_url=None, save_images=False): try: if task_id is not None: diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py index fba18fb11af5..18c28b6420d2 100644 --- a/cvat/apps/engine/backup.py +++ b/cvat/apps/engine/backup.py @@ -13,7 +13,7 @@ import uuid from zipfile import ZipFile from datetime import datetime -from tempfile import mkstemp +from tempfile import NamedTemporaryFile import django_rq from django.conf import settings @@ -33,8 +33,10 @@ from cvat.apps.engine.log import slogger from cvat.apps.engine.serializers import (AttributeSerializer, DataSerializer, LabelSerializer, LabeledDataSerializer, SegmentSerializer, SimpleJobSerializer, TaskReadSerializer, - ProjectReadSerializer, ProjectFileSerializer, TaskFileSerializer) -from cvat.apps.engine.utils import av_scan_paths, process_failed_job, configure_dependent_job, get_rq_job_meta + ProjectReadSerializer, ProjectFileSerializer, TaskFileSerializer, RqIdSerializer) +from cvat.apps.engine.utils import ( + av_scan_paths, process_failed_job, configure_dependent_job, get_rq_job_meta, get_import_rq_id, import_resource_with_clean_up_after +) from cvat.apps.engine.models import ( StorageChoice, StorageMethodChoice, DataChoice, Task, Project, Location) from cvat.apps.engine.task import JobFileMapping, _create_thread @@ -47,7 +49,6 @@ class Version(Enum): V1 = '1.0' - def _get_label_mapping(db_labels): label_mapping = {db_label.id: db_label.name for db_label in db_labels} for db_label in db_labels: @@ -869,7 +870,7 @@ def export(db_instance, request, queue_name): if os.path.exists(file_path): return Response(status=status.HTTP_201_CREATED) elif rq_job.is_failed: - exc_info = str(rq_job.exc_info) + exc_info = rq_job.meta.get('formatted_exception', str(rq_job.exc_info)) rq_job.delete() return Response(exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) @@ -896,9 +897,11 @@ def _download_file_from_bucket(db_storage, filename, key): def _import(importer, request, queue, rq_id, Serializer, file_field_name, location_conf, filename=None): rq_job = queue.fetch_job(rq_id) + if (user_id_from_meta := getattr(rq_job, 'meta', {}).get('user', {}).get('id')) and user_id_from_meta != request.user.id: + return Response(status=status.HTTP_403_FORBIDDEN) + if not rq_job: org_id = getattr(request.iam_context['organization'], 'id', None) - fd = None dependent_job = None location = location_conf.get('location') @@ -907,10 +910,13 @@ def _import(importer, request, queue, rq_id, Serializer, file_field_name, locati serializer = Serializer(data=request.data) serializer.is_valid(raise_exception=True) payload_file = serializer.validated_data[file_field_name] - fd, filename = mkstemp(prefix='cvat_', dir=settings.TMP_FILES_ROOT) - with open(filename, 'wb+') as f: + with NamedTemporaryFile( + prefix='cvat_', + dir=settings.TMP_FILES_ROOT, + delete=False) as tf: + filename = tf.name for chunk in payload_file.chunks(): - f.write(chunk) + tf.write(chunk) else: file_name = request.query_params.get('filename') assert file_name, "The filename wasn't specified" @@ -926,7 +932,9 @@ def _import(importer, request, queue, rq_id, Serializer, file_field_name, locati is_default=location_conf['is_default']) key = filename - fd, filename = mkstemp(prefix='cvat_', dir=settings.TMP_FILES_ROOT) + with NamedTemporaryFile(prefix='cvat_', dir=settings.TMP_FILES_ROOT, delete=False) as tf: + filename = tf.name + dependent_job = configure_dependent_job( queue=queue, rq_id=rq_id, @@ -935,24 +943,25 @@ def _import(importer, request, queue, rq_id, Serializer, file_field_name, locati filename=filename, key=key, request=request, + result_ttl=settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds(), + failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() ) rq_job = queue.enqueue_call( - func=importer, - args=(filename, request.user.id, org_id), + func=import_resource_with_clean_up_after, + args=(importer, filename, request.user.id, org_id), job_id=rq_id, meta={ 'tmp_file': filename, - 'tmp_file_descriptor': fd, **get_rq_job_meta(request=request, db_obj=None) }, - depends_on=dependent_job + depends_on=dependent_job, + result_ttl=settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds(), + failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() ) else: if rq_job.is_finished: project_id = rq_job.return_value - if rq_job.meta['tmp_file_descriptor']: os.close(rq_job.meta['tmp_file_descriptor']) - os.remove(rq_job.meta['tmp_file']) rq_job.delete() return Response({'id': project_id}, status=status.HTTP_201_CREATED) elif rq_job.is_failed or \ @@ -969,7 +978,10 @@ def _import(importer, request, queue, rq_id, Serializer, file_field_name, locati return Response(data=exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - return Response({'rq_id': rq_id}, status=status.HTTP_202_ACCEPTED) + serializer = RqIdSerializer(data={'rq_id': rq_id}) + serializer.is_valid(raise_exception=True) + + return Response(serializer.data, status=status.HTTP_202_ACCEPTED) def get_backup_dirname(): return settings.TMP_FILES_ROOT @@ -978,7 +990,7 @@ def import_project(request, queue_name, filename=None): if 'rq_id' in request.data: rq_id = request.data['rq_id'] else: - rq_id = f"import:project.{uuid.uuid4()}-by-{request.user}" + rq_id = get_import_rq_id('project', uuid.uuid4(), 'backup', request.user) Serializer = ProjectFileSerializer file_field_name = 'project_file' @@ -1001,10 +1013,8 @@ def import_project(request, queue_name, filename=None): ) def import_task(request, queue_name, filename=None): - if 'rq_id' in request.data: - rq_id = request.data['rq_id'] - else: - rq_id = f"import:task.{uuid.uuid4()}-by-{request.user}" + rq_id = request.data.get('rq_id', get_import_rq_id('task', uuid.uuid4(), 'backup', request.user)) + Serializer = TaskFileSerializer file_field_name = 'task_file' diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index a733115496db..4f75fffe608c 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -40,9 +40,13 @@ def __init__(self, dimension=DimensionType.DIM_2D): self._cache = caches['media'] def _get_or_set_cache_item(self, key, create_function): + slogger.glob.info(f'Starting to get chunk from cache: key {key}') item = self._cache.get(key) + slogger.glob.info(f'Ending to get chunk from cache: key {key}, is_cached {bool(item)}') if not item: + slogger.glob.info(f'Starting to prepare chunk: key {key}') item = create_function() + slogger.glob.info(f'Ending to prepare chunk: key {key}') if item[0]: self._cache.set(key, item) diff --git a/cvat/apps/engine/handlers.py b/cvat/apps/engine/handlers.py new file mode 100644 index 000000000000..3253957dd3e0 --- /dev/null +++ b/cvat/apps/engine/handlers.py @@ -0,0 +1,22 @@ +# Copyright (C) 2023 CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +from pathlib import Path +from time import time +from django.conf import settings +from cvat.apps.engine.log import slogger + + +def clear_import_cache(path: Path, creation_time: float) -> None: + """ + This function checks and removes the import files if they have not been removed from rq import jobs. + This means that for some reason file was uploaded to CVAT server but rq import job was not created. + + Args: + path (Path): path to file + creation_time (float): file creation time + """ + if path.is_file() and (time() - creation_time + 1) >= settings.IMPORT_CACHE_CLEAN_DELAY.total_seconds(): + path.unlink() + slogger.glob.warning(f"The file {str(path)} was removed from cleaning job.") diff --git a/cvat/apps/engine/location.py b/cvat/apps/engine/location.py index e463370bca6c..d2dc669f86ef 100644 --- a/cvat/apps/engine/location.py +++ b/cvat/apps/engine/location.py @@ -5,7 +5,7 @@ from enum import Enum from typing import Any, Dict -from cvat.apps.engine.models import Location +from cvat.apps.engine.models import Location, Job class StorageType(str, Enum): TARGET = 'target_storage' @@ -20,7 +20,7 @@ def get_location_configuration(obj, field_name: str, use_settings: bool = False) } if use_settings: - storage = getattr(obj, field_name) + storage = getattr(obj, field_name) if not isinstance(obj, Job) else getattr(obj.segment.task, field_name) if storage is None: location_conf['location'] = Location.LOCAL else: diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index bec571ad5ebb..11590f4e4633 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -381,12 +381,24 @@ def get_zip_filename(self): def get_path(self, i): if self._zip_source.filename: - return os.path.join(os.path.dirname(self._zip_source.filename), self._source_path[i]) \ - if not self.extract_dir else os.path.join(self.extract_dir, self._source_path[i]) + prefix = self._get_extract_prefix() + return os.path.join(prefix, self._source_path[i]) else: # necessary for mime_type definition return self._source_path[i] + def __contains__(self, media_file): + return super().__contains__(os.path.relpath(media_file, self._get_extract_prefix())) + + def _get_extract_prefix(self): + return self.extract_dir or os.path.dirname(self._zip_source.filename) + def reconcile(self, source_files, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D, sorting_method=None): + if source_files: + # file list is expected to be a processed output of self.get_path() + # which returns files with the output directory prefix + prefix = self._get_extract_prefix() + source_files = [os.path.relpath(fn, prefix) for fn in source_files] + super().reconcile( source_files=source_files, step=step, @@ -397,7 +409,7 @@ def reconcile(self, source_files, step=1, start=0, stop=None, dimension=Dimensio ) def extract(self): - self._zip_source.extractall(self.extract_dir if self.extract_dir else os.path.dirname(self._zip_source.filename)) + self._zip_source.extractall(self._get_extract_prefix()) if not self.extract_dir: os.remove(self._zip_source.filename) diff --git a/cvat/apps/engine/migrations/0068_auto_20230418_0901.py b/cvat/apps/engine/migrations/0068_auto_20230418_0901.py new file mode 100644 index 000000000000..f75fe6c7d982 --- /dev/null +++ b/cvat/apps/engine/migrations/0068_auto_20230418_0901.py @@ -0,0 +1,37 @@ +# Generated by Django 3.2.18 on 2023-04-18 09:01 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('engine', '0067_alter_cloudstorage_credentials_type'), + ] + + operations = [ + migrations.AlterModelOptions( + name='clientfile', + options={'default_permissions': (), 'ordering': ('id',)}, + ), + migrations.AlterModelOptions( + name='relatedfile', + options={'default_permissions': (), 'ordering': ('id',)}, + ), + migrations.AlterModelOptions( + name='remotefile', + options={'default_permissions': (), 'ordering': ('id',)}, + ), + migrations.AlterModelOptions( + name='serverfile', + options={'default_permissions': (), 'ordering': ('id',)}, + ), + migrations.AlterUniqueTogether( + name='remotefile', + unique_together={('data', 'file')}, + ), + migrations.AlterUniqueTogether( + name='serverfile', + unique_together={('data', 'file')}, + ), + ] diff --git a/cvat/apps/engine/migrations/0069_auto_20230608_1915.py b/cvat/apps/engine/migrations/0069_auto_20230608_1915.py new file mode 100644 index 000000000000..58f55cae4cf1 --- /dev/null +++ b/cvat/apps/engine/migrations/0069_auto_20230608_1915.py @@ -0,0 +1,28 @@ +# Generated by Django 3.2.18 on 2023-06-08 19:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('engine', '0068_auto_20230418_0901'), + ] + + operations = [ + migrations.AlterField( + model_name='labeledimage', + name='source', + field=models.CharField(choices=[('auto', 'AUTO'), ('semi-auto', 'SEMI_AUTO'), ('manual', 'MANUAL')], default='manual', max_length=16, null=True), + ), + migrations.AlterField( + model_name='labeledshape', + name='source', + field=models.CharField(choices=[('auto', 'AUTO'), ('semi-auto', 'SEMI_AUTO'), ('manual', 'MANUAL')], default='manual', max_length=16, null=True), + ), + migrations.AlterField( + model_name='labeledtrack', + name='source', + field=models.CharField(choices=[('auto', 'AUTO'), ('semi-auto', 'SEMI_AUTO'), ('manual', 'MANUAL')], default='manual', max_length=16, null=True), + ), + ] diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 405dce1f5330..2c9ec2abd90d 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -6,18 +6,25 @@ import base64 import json import os +import os.path import uuid from dataclasses import asdict, dataclass from distutils.util import strtobool +from pathlib import Path +from tempfile import NamedTemporaryFile from unittest import mock +import django_rq from django.conf import settings from rest_framework import mixins, status from rest_framework.response import Response from cvat.apps.engine.location import StorageType, get_location_configuration +from cvat.apps.engine.log import slogger from cvat.apps.engine.models import Location from cvat.apps.engine.serializers import DataSerializer +from cvat.apps.engine.handlers import clear_import_cache +from cvat.apps.engine.utils import get_import_rq_id class TusFile: @@ -145,9 +152,30 @@ def __init__(self, request): self.size = int(request.META.get("CONTENT_LENGTH", settings.TUS_DEFAULT_CHUNK_SIZE)) self.content = request.body -# This upload mixin is implemented using tus -# tus is open protocol for file uploads (see more https://tus.io/) class UploadMixin: + """ + Implements file uploads to the server. Allows to upload single and multiple files, suspend + and resume uploading. Uses the TUS open file uploading protocol (https://tus.io/). + + Implements the following protocols: + a. A single Data request + + and + + b.1. An Upload-Start request + b.2.a. The regular TUS protocol requests (Upload-Length + Chunks) + b.2.b. Upload-Multiple requests + b.3. An Upload-Finish request + + Requests: + - Data - POST, no extra headers or 'Upload-Start' + 'Upload-Finish' headers + - Upload-Start - POST, has an 'Upload-Start' header + - Upload-Length - POST, has an 'Upload-Length' header (read the TUS protocol) + - Chunk - HEAD/PATCH (read the TUS protocol) + - Upload-Finish - POST, has an 'Upload-Finish' header + - Upload-Multiple - POST, has a 'Upload-Multiple' header + """ + _tus_api_version = '1.0.0' _tus_api_version_supported = ['1.0.0'] _tus_api_extensions = [] @@ -198,11 +226,11 @@ def upload_data(self, request): if one_request_upload or finish_upload: return self.upload_finished(request) elif start_upload: - return Response(status=status.HTTP_202_ACCEPTED) + return self.upload_started(request) elif tus_request: return self.init_tus_upload(request) elif bulk_file_upload: - return self.append(request) + return self.append_files(request) else: # backward compatibility case - no upload headers were found return self.upload_finished(request) @@ -212,7 +240,7 @@ def init_tus_upload(self, request): else: metadata = self._get_metadata(request) filename = metadata.get('filename', '') - if not self.validate_filename(filename): + if not self.is_valid_uploaded_file_name(filename): return self._tus_response(status=status.HTTP_400_BAD_REQUEST, data="File name {} is not allowed".format(filename)) @@ -221,7 +249,27 @@ def init_tus_upload(self, request): if message_id: metadata["message_id"] = base64.b64decode(message_id) - file_exists = os.path.lexists(os.path.join(self.get_upload_dir(), filename)) + import_type = request.path.strip('/').split('/')[-1] + if import_type == 'backup': + # we need to create unique temp file here because + # users can try to import backups with the same name at the same time + with NamedTemporaryFile(prefix=f'cvat-backup-{filename}-by-{request.user}', suffix='.zip', dir=self.get_upload_dir()) as tmp_file: + filename = os.path.relpath(tmp_file.name, self.get_upload_dir()) + metadata['filename'] = filename + file_path = os.path.join(self.get_upload_dir(), filename) + file_exists = os.path.lexists(file_path) and import_type != 'backup' + + if file_exists: + # check whether the rq_job is in progress or has been finished/failed + object_class_name = self._object.__class__.__name__.lower() + template = get_import_rq_id(object_class_name, self._object.pk, import_type, request.user) + queue = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) + finished_job_ids = queue.finished_job_registry.get_job_ids() + failed_job_ids = queue.failed_job_registry.get_job_ids() + if template in finished_job_ids or template in failed_job_ids: + os.remove(file_path) + file_exists = False + if file_exists: return self._tus_response(status=status.HTTP_409_CONFLICT, data="File with same name already exists") @@ -231,11 +279,27 @@ def init_tus_upload(self, request): return self._tus_response(status=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, data="File size exceeds max limit of {} bytes".format(self._tus_max_file_size)) + tus_file = TusFile.create_file(metadata, file_size, self.get_upload_dir()) location = request.build_absolute_uri() if 'HTTP_X_FORWARDED_HOST' not in request.META: location = request.META.get('HTTP_ORIGIN') + request.META.get('PATH_INFO') + + if import_type in ('backup', 'annotations', 'datasets'): + scheduler = django_rq.get_scheduler(settings.CVAT_QUEUES.CLEANING.value) + path = Path(self.get_upload_dir()) / tus_file.filename + cleaning_job = scheduler.enqueue_in(time_delta=settings.IMPORT_CACHE_CLEAN_DELAY, + func=clear_import_cache, + path=path, + creation_time=Path(tus_file.file_path).stat().st_ctime + ) + slogger.glob.info( + f'The cleaning job {cleaning_job.id} is queued.' + f'The check that the file {path} is deleted will be carried out after ' + f'{settings.IMPORT_CACHE_CLEAN_DELAY}.' + ) + return self._tus_response( status=status.HTTP_201_CREATED, extra_headers={'Location': '{}{}'.format(location, tus_file.file_id), @@ -268,32 +332,55 @@ def append_tus_chunk(self, request, file_id): extra_headers={'Upload-Offset': tus_file.offset, 'Upload-Filename': tus_file.filename}) - def validate_filename(self, filename): + def is_valid_uploaded_file_name(self, filename: str) -> bool: + """ + Checks the file name to be valid. + Returns True if the filename is valid, otherwise returns False. + """ + upload_dir = self.get_upload_dir() file_path = os.path.join(upload_dir, filename) return os.path.commonprefix((os.path.realpath(file_path), upload_dir)) == upload_dir - def get_upload_dir(self): + def get_upload_dir(self) -> str: return self._object.data.get_upload_dirname() - def get_request_client_files(self, request): + def _get_request_client_files(self, request): serializer = DataSerializer(self._object, data=request.data) serializer.is_valid(raise_exception=True) - data = {k: v for k, v in serializer.validated_data.items()} - return data.get('client_files', None) + return serializer.validated_data.get('client_files') - def append(self, request): - client_files = self.get_request_client_files(request) + def append_files(self, request): + """ + Processes a single or multiple files sent in a single request inside + a file uploading session. + """ + + client_files = self._get_request_client_files(request) if client_files: upload_dir = self.get_upload_dir() for client_file in client_files: - with open(os.path.join(upload_dir, client_file['file'].name), 'ab+') as destination: + filename = client_file['file'].name + if not self.is_valid_uploaded_file_name(filename): + return Response(status=status.HTTP_400_BAD_REQUEST, + data=f"File name {filename} is not allowed", content_type="text/plain") + + with open(os.path.join(upload_dir, filename), 'ab+') as destination: destination.write(client_file['file'].read()) return Response(status=status.HTTP_200_OK) - # override this to do stuff after upload + def upload_started(self, request): + """ + Allows to do actions before upcoming file uploading. + """ + return Response(status=status.HTTP_202_ACCEPTED) + def upload_finished(self, request): - raise NotImplementedError('You need to implement upload_finished in UploadMixin') + """ + Allows to process uploaded files. + """ + + raise NotImplementedError('Must be implemented in the derived class') class AnnotationMixin: def export_annotations(self, request, db_obj, export_func, callback, get_data=None): @@ -330,7 +417,7 @@ def export_annotations(self, request, db_obj, export_func, callback, get_data=No data = get_data(self._object.pk) return Response(data) - def import_annotations(self, request, db_obj, import_func, rq_func, rq_id): + def import_annotations(self, request, db_obj, import_func, rq_func, rq_id_template): is_tus_request = request.headers.get('Upload-Length', None) is not None or \ request.method == 'OPTIONS' if is_tus_request: @@ -352,7 +439,7 @@ def import_annotations(self, request, db_obj, import_func, rq_func, rq_id): return import_func( request=request, - rq_id=rq_id, + rq_id_template=rq_id_template, rq_func=rq_func, db_obj=self._object, format_name=format_name, diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index c98dd9d32c11..27a4966785b4 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -264,11 +264,6 @@ def make_dirs(self): os.makedirs(self.get_original_cache_dirname()) os.makedirs(self.get_upload_dirname()) - def get_uploaded_files(self): - upload_dir = self.get_upload_dirname() - uploaded_files = [os.path.join(upload_dir, file) for file in os.listdir(upload_dir) if os.path.isfile(os.path.join(upload_dir, file))] - represented_files = [{'file':f} for f in uploaded_files] - return represented_files class Video(models.Model): data = models.OneToOneField(Data, on_delete=models.CASCADE, related_name="video", null=True) @@ -424,6 +419,10 @@ class Meta: default_permissions = () unique_together = ("data", "file") + # Some DBs can shuffle the rows. Here we restore the insertion order. + # https://github.com/opencv/cvat/pull/5083#discussion_r1038032715 + ordering = ('id', ) + # For server files on the mounted share class ServerFile(models.Model): data = models.ForeignKey(Data, on_delete=models.CASCADE, null=True, related_name='server_files') @@ -431,6 +430,11 @@ class ServerFile(models.Model): class Meta: default_permissions = () + unique_together = ("data", "file") + + # Some DBs can shuffle the rows. Here we restore the insertion order. + # https://github.com/opencv/cvat/pull/5083#discussion_r1038032715 + ordering = ('id', ) # For URLs class RemoteFile(models.Model): @@ -439,6 +443,11 @@ class RemoteFile(models.Model): class Meta: default_permissions = () + unique_together = ("data", "file") + + # Some DBs can shuffle the rows. Here we restore the insertion order. + # https://github.com/opencv/cvat/pull/5083#discussion_r1038032715 + ordering = ('id', ) class RelatedFile(models.Model): @@ -451,6 +460,10 @@ class Meta: default_permissions = () unique_together = ("data", "path") + # Some DBs can shuffle the rows. Here we restore the insertion order. + # https://github.com/opencv/cvat/pull/5083#discussion_r1038032715 + ordering = ('id', ) + class Segment(models.Model): task = models.ForeignKey(Task, on_delete=models.CASCADE) start_frame = models.IntegerField() @@ -546,9 +559,9 @@ def create(cls, **kwargs): @property def organization_id(self): if self.project is not None: - return self.project.organization.id + return self.project.organization_id if self.task is not None: - return self.task.organization.id + return self.task.organization_id return None class Meta: @@ -643,6 +656,7 @@ def __str__(self): class SourceType(str, Enum): AUTO = 'auto' + SEMI_AUTO = 'semi-auto' MANUAL = 'manual' @classmethod diff --git a/cvat/apps/engine/schema.py b/cvat/apps/engine/schema.py index 060aa674ffaf..5931381b403d 100644 --- a/cvat/apps/engine/schema.py +++ b/cvat/apps/engine/schema.py @@ -4,12 +4,12 @@ import textwrap from typing import Type -from rest_framework import serializers -from drf_spectacular.utils import OpenApiParameter + from drf_spectacular.extensions import OpenApiSerializerExtension -from drf_spectacular.plumbing import force_instance, build_basic_type -from drf_spectacular.types import OpenApiTypes +from drf_spectacular.plumbing import build_basic_type, force_instance from drf_spectacular.serializers import PolymorphicProxySerializerExtension +from drf_spectacular.types import OpenApiTypes +from rest_framework import serializers def _copy_serializer( @@ -229,27 +229,5 @@ class CloudStorageReadSerializerExtension(_CloudStorageSerializerExtension): class CloudStorageWriteSerializerExtension(_CloudStorageSerializerExtension): target_class = 'cvat.apps.engine.serializers.CloudStorageWriteSerializer' -ORGANIZATION_OPEN_API_PARAMETERS = [ - OpenApiParameter( - name='org', - type=str, - required=False, - location=OpenApiParameter.QUERY, - description="Organization unique slug", - ), - OpenApiParameter( - name='org_id', - type=int, - required=False, - location=OpenApiParameter.QUERY, - description="Organization identifier", - ), - OpenApiParameter( - name='X-Organization', - type=str, - required=False, - location=OpenApiParameter.HEADER - ), -] __all__ = [] # No public symbols here diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 3fefec17b434..3394c63568ac 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -22,12 +22,10 @@ from cvat.apps.engine import models from cvat.apps.engine.cloud_provider import get_cloud_storage_instance, Credentials, Status from cvat.apps.engine.log import slogger -from cvat.apps.engine.utils import parse_specific_attributes +from cvat.apps.engine.utils import parse_specific_attributes, build_field_filter_params, get_list_view_name, reverse from drf_spectacular.utils import OpenApiExample, extend_schema_field, extend_schema_serializer -from cvat.apps.engine.utils import build_field_filter_params, get_list_view_name, reverse - class WriteOnceMixin: """ @@ -667,6 +665,9 @@ class RqStatusSerializer(serializers.Serializer): message = serializers.CharField(allow_blank=True, default="") progress = serializers.FloatField(max_value=100, default=0) +class RqIdSerializer(serializers.Serializer): + rq_id = serializers.CharField() + class JobFiles(serializers.ListField): """ @@ -762,12 +763,33 @@ class DataSerializer(serializers.ModelSerializer): """)) job_file_mapping = JobFileMapping(required=False, write_only=True) + upload_file_order = serializers.ListField( + child=serializers.CharField(max_length=1024), + default=list, allow_empty=True, write_only=True, + help_text=textwrap.dedent("""\ + Allows to specify file order for client_file uploads. + Only valid with the "{}" sorting method selected. + + To state that the input files are sent in the correct order, + pass an empty list. + + If you want to send files in an arbitrary order + and reorder them afterwards on the server, + pass the list of file names in the required order. + """.format(models.SortingMethod.PREDEFINED)) + ) + class Meta: model = models.Data - fields = ('chunk_size', 'size', 'image_quality', 'start_frame', 'stop_frame', 'frame_filter', - 'compressed_chunk_type', 'original_chunk_type', 'client_files', 'server_files', 'server_files_exclude','remote_files', 'use_zip_chunks', - 'cloud_storage_id', 'use_cache', 'copy_data', 'storage_method', 'storage', 'sorting_method', 'filename_pattern', - 'job_file_mapping') + fields = ( + 'chunk_size', 'size', 'image_quality', 'start_frame', 'stop_frame', 'frame_filter', + 'compressed_chunk_type', 'original_chunk_type', + 'client_files', 'server_files', 'remote_files', + 'use_zip_chunks', 'server_files_exclude', + 'cloud_storage_id', 'use_cache', 'copy_data', 'storage_method', + 'storage', 'sorting_method', 'filename_pattern', + 'job_file_mapping', 'upload_file_order', + ) extra_kwargs = { 'chunk_size': { 'help_text': "Maximum number of frames per chunk" }, 'size': { 'help_text': "The number of frames" }, @@ -857,8 +879,9 @@ def _pop_data(self, validated_data): server_files = validated_data.pop('server_files') remote_files = validated_data.pop('remote_files') - validated_data.pop('job_file_mapping', None) # optional - validated_data.pop('server_files_exclude', None) # optional + validated_data.pop('job_file_mapping', None) # optional, not present in Data + validated_data.pop('upload_file_order', None) # optional, not present in Data + validated_data.pop('server_files_exclude', None) # optional, not present in Data for extra_key in { 'use_zip_chunks', 'use_cache', 'copy_data' }: validated_data.pop(extra_key) diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 17f2ff8618f7..589f080df529 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -1,4 +1,3 @@ - # Copyright (C) 2018-2022 Intel Corporation # Copyright (C) 2022-2023 CVAT.ai Corporation # @@ -28,12 +27,12 @@ from cvat.apps.engine import models from cvat.apps.engine.log import slogger -from cvat.apps.engine.media_extractors import (MEDIA_TYPES, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter, +from cvat.apps.engine.media_extractors import (MEDIA_TYPES, ImageListReader, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter, ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime, sort) from cvat.apps.engine.utils import av_scan_paths, get_rq_job_meta from cvat.utils.http import make_requests_session from utils.dataset_manifest import ImageManifestManager, VideoManifestManager, is_manifest -from utils.dataset_manifest.core import VideoManifestValidator +from utils.dataset_manifest.core import VideoManifestValidator, is_dataset_manifest from utils.dataset_manifest.utils import detect_related_images from .cloud_provider import db_storage_to_storage_instance @@ -250,8 +249,10 @@ def _validate_data(counter, manifest_files=None): else: multiple_entries += len(counter[media_type]) - if manifest_files and media_type not in ('video', 'image'): - raise Exception('File with meta information can only be uploaded with video/images ') + if manifest_files and media_type not in ('video', 'image', 'zip', 'archive'): + raise Exception( + 'File with meta information can only be uploaded with video/images/archives' + ) if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1: unique_types = ', '.join([k for k, v in MEDIA_TYPES.items() if v['unique']]) @@ -310,16 +311,19 @@ def _validate_job_file_mapping( def _validate_manifest( manifests: List[str], - root_dir: str, + root_dir: Optional[str], + *, is_in_cloud: bool, - db_cloud_storage: models.CloudStorage, + db_cloud_storage: Optional[Any], data_storage_method: str, + data_sorting_method: str, ) -> Optional[str]: if manifests: if len(manifests) != 1: raise ValidationError('Only one manifest file can be attached to data') manifest_file = manifests[0] full_manifest_path = os.path.join(root_dir, manifests[0]) + if is_in_cloud: cloud_storage_instance = db_storage_to_storage_instance(db_cloud_storage) # check that cloud storage manifest file exists and is up to date @@ -327,11 +331,24 @@ def _validate_manifest( datetime.utcfromtimestamp(os.path.getmtime(full_manifest_path)).replace(tzinfo=pytz.UTC) \ < cloud_storage_instance.get_file_last_modified(manifest_file): cloud_storage_instance.download_file(manifest_file, full_manifest_path) + if is_manifest(full_manifest_path): - if not (settings.USE_CACHE or data_storage_method != models.StorageMethodChoice.CACHE): - raise ValidationError("Manifest file can be uploaded only if 'Use cache' option is also selected") + if not ( + data_sorting_method == models.SortingMethod.PREDEFINED or + data_storage_method == models.StorageMethodChoice.CACHE and settings.USE_CACHE + ): + if data_storage_method == models.StorageMethodChoice.CACHE and not settings.USE_CACHE: + slogger.glob.warning("This server doesn't allow to use cache for data. " + "Please turn 'use cache' off and try to recreate the task") + + raise ValidationError( + "A manifest file can only be used with the 'use cache' option " + "or when the 'sorting_method' == 'predefined'" + ) return manifest_file + raise ValidationError('Invalid manifest was uploaded') + return None def _validate_url(url): @@ -410,6 +427,52 @@ def _download_data_from_cloud_storage( def _get_manifest_frame_indexer(start_frame=0, frame_step=1): return lambda frame_id: start_frame + frame_id * frame_step +def _read_dataset_manifest(path: str, *, create_index: bool = False) -> ImageManifestManager: + """ + Reads an upload manifest file + """ + + if not is_dataset_manifest(path): + raise ValidationError( + "Can't recognize a dataset manifest file in " + "the uploaded file '{}'".format(os.path.basename(path)) + ) + + return ImageManifestManager(path, create_index=create_index) + +def _restore_file_order_from_manifest( + extractor: ImageListReader, manifest: ImageManifestManager, upload_dir: str +) -> List[str]: + """ + Restores file ordering for the "predefined" file sorting method of the task creation. + Checks for extra files in the input. + Read more: https://github.com/opencv/cvat/issues/5061 + """ + + input_files = {os.path.relpath(p, upload_dir): p for p in extractor.absolute_source_paths} + manifest_files = list(manifest.data) + + mismatching_files = list(input_files.keys() ^ manifest_files) + if mismatching_files: + DISPLAY_ENTRIES_COUNT = 5 + mismatching_display = [ + fn + (" (upload)" if fn in input_files else " (manifest)") + for fn in mismatching_files[:DISPLAY_ENTRIES_COUNT] + ] + remaining_count = len(mismatching_files) - DISPLAY_ENTRIES_COUNT + raise FileNotFoundError( + "Uploaded files do no match the upload manifest file contents. " + "Please check the upload manifest file contents and the list of uploaded files. " + "Mismatching files: {}{}. " + "Read more: https://opencv.github.io/cvat/docs/manual/advanced/dataset_manifest/" + .format( + ", ".join(mismatching_display), + f" (and {remaining_count} more). " if 0 < remaining_count else "" + ) + ) + + return [input_files[fn] for fn in manifest_files] + def _create_task_manifest_based_on_cloud_storage_manifest( sorted_media: List[str], cloud_storage_manifest_prefix: str, @@ -478,13 +541,19 @@ def _create_thread( manifest_root = upload_dir elif is_data_in_cloud: manifest_root = db_data.cloud_storage.get_storage_dirname() + else: + assert False, f"Unknown file storage {db_data.storage}" manifest_file = _validate_manifest( - manifest_files, manifest_root, - is_data_in_cloud, db_data.cloud_storage if is_data_in_cloud else None, - db_data.storage_method, + manifest_files, + manifest_root, + is_in_cloud=is_data_in_cloud, + db_cloud_storage=db_data.cloud_storage if is_data_in_cloud else None, + data_storage_method=db_data.storage_method, + data_sorting_method=data['sorting_method'], ) + manifest = None if is_data_in_cloud: if manifest_file: cloud_storage_manifest = ImageManifestManager( @@ -598,6 +667,7 @@ def _create_thread( _copy_data_from_share_point( (data['server_files'] + [manifest_file]) if manifest_file else data['server_files'], upload_dir, data.get('server_files_path'), data.get('server_files_exclude')) + manifest_root = upload_dir elif is_data_in_cloud: if job_file_mapping is not None: sorted_media = list(itertools.chain.from_iterable(job_file_mapping)) @@ -619,10 +689,6 @@ def _create_thread( job.meta['status'] = 'Media files are being extracted...' job.save_meta() - db_images = [] - extractor = None - manifest_index = _get_manifest_frame_indexer() - # If upload from server_files image and directories # need to update images list by all found images in directories if (data['server_files']) and len(media['directory']) and len(media['image']): @@ -635,35 +701,53 @@ def _create_thread( ) media['directory'] = [] + if (not isBackupRestore and manifest_file and + data['sorting_method'] == models.SortingMethod.RANDOM + ): + raise ValidationError("It isn't supported to upload manifest file and use random sorting") + + if (isBackupRestore and db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM and + data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED} + ): + raise ValidationError( + "It isn't supported to import the task that was created " + "without cache but with random/predefined sorting" + ) + + # Extract input data + extractor = None + manifest_index = _get_manifest_frame_indexer() for media_type, media_files in media.items(): - if media_files: - if extractor is not None: - raise Exception('Combined data types are not supported') - if (isDatasetImport or isBackupRestore) and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE: - manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step()) - db_data.start_frame = 0 - data['stop_frame'] = None - db_data.frame_filter = '' - source_paths=[os.path.join(upload_dir, f) for f in media_files] - if manifest_file and not isBackupRestore and data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}: - raise Exception("It isn't supported to upload manifest file and use random sorting") - if isBackupRestore and db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM and \ - data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}: - raise Exception("It isn't supported to import the task that was created without cache but with random/predefined sorting") - - details = { - 'source_path': source_paths, - 'step': db_data.get_frame_step(), - 'start': db_data.start_frame, - 'stop': data['stop_frame'], - } - if media_type in {'archive', 'zip', 'pdf'} and db_data.storage == models.StorageChoice.SHARE: - details['extract_dir'] = db_data.get_upload_dirname() - upload_dir = db_data.get_upload_dirname() - db_data.storage = models.StorageChoice.LOCAL - if media_type != 'video': - details['sorting_method'] = data['sorting_method'] - extractor = MEDIA_TYPES[media_type]['extractor'](**details) + if not media_files: + continue + + if extractor is not None: + raise ValidationError('Combined data types are not supported') + + if (isDatasetImport or isBackupRestore) and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE: + manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step()) + db_data.start_frame = 0 + data['stop_frame'] = None + db_data.frame_filter = '' + + source_paths = [os.path.join(upload_dir, f) for f in media_files] + + details = { + 'source_path': source_paths, + 'step': db_data.get_frame_step(), + 'start': db_data.start_frame, + 'stop': data['stop_frame'], + } + if media_type in {'archive', 'zip', 'pdf'} and db_data.storage == models.StorageChoice.SHARE: + details['extract_dir'] = db_data.get_upload_dirname() + upload_dir = db_data.get_upload_dirname() + db_data.storage = models.StorageChoice.LOCAL + if media_type != 'video': + details['sorting_method'] = data['sorting_method'] + extractor = MEDIA_TYPES[media_type]['extractor'](**details) + + if extractor is None: + raise ValidationError("Can't create a task without data") # filter server_files from server_files_exclude when share point is used and files are not copied to CVAT. # here we exclude the case when the files are copied to CVAT because files are already filtered out. @@ -683,14 +767,22 @@ def _create_thread( if isinstance(extractor, MEDIA_TYPES['zip']['extractor']): extractor.extract() - if db_data.storage == models.StorageChoice.LOCAL or \ - (db_data.storage == models.StorageChoice.SHARE and \ - isinstance(extractor, MEDIA_TYPES['zip']['extractor'])): + validate_dimension = ValidateDimension() + if db_data.storage == models.StorageChoice.LOCAL or ( + db_data.storage == models.StorageChoice.SHARE and + isinstance(extractor, MEDIA_TYPES['zip']['extractor']) + ): validate_dimension.set_path(upload_dir) validate_dimension.validate() - if db_task.project is not None and db_task.project.tasks.count() > 1 and db_task.project.tasks.first().dimension != validate_dimension.dimension: - raise Exception(f'Dimension ({validate_dimension.dimension}) of the task must be the same as other tasks in project ({db_task.project.tasks.first().dimension})') + if (db_task.project is not None and + db_task.project.tasks.count() > 1 and + db_task.project.tasks.first().dimension != validate_dimension.dimension + ): + raise ValidationError( + f"Dimension ({validate_dimension.dimension}) of the task must be the " + f"same as other tasks in project ({db_task.project.tasks.first().dimension})" + ) if validate_dimension.dimension == models.DimensionType.DIM_3D: db_task.dimension = models.DimensionType.DIM_3D @@ -715,35 +807,51 @@ def _create_thread( extractor.filter(lambda x: not re.search(r'(^|{0})related_images{0}'.format(os.sep), x)) related_images = detect_related_images(extractor.absolute_source_paths, upload_dir) - # Sort the files - if (isBackupRestore and ( - not isinstance(extractor, MEDIA_TYPES['video']['extractor']) - and db_data.storage_method == models.StorageMethodChoice.CACHE - and db_data.sorting_method in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED} - and validate_dimension.dimension != models.DimensionType.DIM_3D - ) or job_file_mapping - ): - sorted_media_files = [] - + if validate_dimension.dimension != models.DimensionType.DIM_3D and ( + ( + not isinstance(extractor, MEDIA_TYPES['video']['extractor']) and + isBackupRestore and + db_data.storage_method == models.StorageMethodChoice.CACHE and + db_data.sorting_method in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED} + ) or ( + not isDatasetImport and + not isBackupRestore and + data['sorting_method'] == models.SortingMethod.PREDEFINED and ( + # Sorting with manifest is required for zip + isinstance(extractor, MEDIA_TYPES['zip']['extractor']) or + + # Sorting with manifest is optional for non-video + (manifest_file or manifest) and + not isinstance(extractor, MEDIA_TYPES['video']['extractor']) + ) + ) + ) or job_file_mapping: + # We should sort media_files according to the manifest content sequence + # and we should do this in general after validation step for 3D data + # and after filtering from related_images if job_file_mapping: - sorted_media_files.extend(itertools.chain.from_iterable(job_file_mapping)) + sorted_media_files = itertools.chain.from_iterable(job_file_mapping) + else: - # we should sort media_files according to the manifest content sequence - # and we should do this in general after validation step for 3D data and after filtering from related_images - manifest = ImageManifestManager(db_data.get_manifest_path()) - manifest.set_index() + if manifest is None: + if not manifest_file or not os.path.isfile(os.path.join(manifest_root, manifest_file)): + raise FileNotFoundError( + "Can't find upload manifest file '{}' " + "in the uploaded files. When the 'predefined' sorting method is used, " + "this file is required in the input files. " + "Read more: https://opencv.github.io/cvat/docs/manual/advanced/dataset_manifest/" + .format(manifest_file or os.path.basename(db_data.get_manifest_path())) + ) - for idx in range(len(extractor.absolute_source_paths)): - properties = manifest[idx] - image_name = properties.get('name', None) - image_extension = properties.get('extension', None) + manifest = _read_dataset_manifest(os.path.join(manifest_root, manifest_file), + create_index=manifest_root.startswith(db_data.get_upload_dirname()) + ) - full_image_path = f"{image_name}{image_extension}" if image_name and image_extension else None - if full_image_path: - sorted_media_files.append(full_image_path) + sorted_media_files = _restore_file_order_from_manifest(extractor, manifest, upload_dir) sorted_media_files = [os.path.join(upload_dir, fn) for fn in sorted_media_files] + # validate the sorting for file_path in sorted_media_files: if not file_path in extractor: raise ValidationError( @@ -815,18 +923,20 @@ def _update_status(msg): job.meta['status'] = msg job.save_meta() + db_images = [] + if settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE: for media_type, media_files in media.items(): - if not media_files: continue # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl' or 'some_manifest.jsonl') if manifest_file and not os.path.exists(db_data.get_manifest_path()): - shutil.copyfile(os.path.join(upload_dir, manifest_file), + shutil.copyfile(os.path.join(manifest_root, manifest_file), db_data.get_manifest_path()) - if upload_dir != settings.SHARE_ROOT: - os.remove(os.path.join(upload_dir, manifest_file)) + if manifest_root and manifest_root.startswith(db_data.get_upload_dirname()): + os.remove(os.path.join(manifest_root, manifest_file)) + manifest_file = os.path.relpath(db_data.get_manifest_path(), upload_dir) if task_mode == MEDIA_TYPES['video']['mode']: try: @@ -902,8 +1012,17 @@ def _update_status(msg): # check mapping if not chunk_path.endswith(f"{properties['name']}{properties['extension']}"): raise Exception('Incorrect file mapping to manifest content') - if db_task.dimension == models.DimensionType.DIM_2D: + + if db_task.dimension == models.DimensionType.DIM_2D and ( + properties.get('width') is not None and + properties.get('height') is not None + ): resolution = (properties['width'], properties['height']) + elif is_data_in_cloud: + raise Exception( + "Can't find image '{}' width or height info in the manifest" + .format(f"{properties['name']}{properties['extension']}") + ) else: resolution = extractor.get_image_size(frame_id) img_sizes.append(resolution) diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index ef91c34441dc..95a136ef453c 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -5,6 +5,7 @@ from contextlib import ExitStack import io +from itertools import product import os import random import shutil @@ -1836,9 +1837,9 @@ def _run_api_v2_projects_id_dataset_import(self, pid, user, data, f): response = self.client.post("/api/projects/{}/dataset?format={}".format(pid, f), data=data, format="multipart") return response - def _run_api_v2_projects_id_dataset_import_status(self, pid, user): + def _run_api_v2_projects_id_dataset_import_status(self, pid, user, rq_id): with ForceLogin(user, self.client): - response = self.client.get("/api/projects/{}/dataset?action=import_status".format(pid), format="json") + response = self.client.get("/api/projects/{}/dataset?action=import_status&rq_id={}".format(pid, rq_id), format="json") return response def test_api_v2_projects_id_export_import(self): @@ -1867,7 +1868,8 @@ def test_api_v2_projects_id_export_import(self): response = self._run_api_v2_projects_id_dataset_import(pid_import, self.owner, import_data, "CVAT 1.1") self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) - response = self._run_api_v2_projects_id_dataset_import_status(pid_import, self.owner) + rq_id = response.data.get('rq_id') + response = self._run_api_v2_projects_id_dataset_import_status(pid_import, self.owner, rq_id) self.assertEqual(response.status_code, status.HTTP_201_CREATED) def tearDown(self): @@ -3243,8 +3245,12 @@ def setUpClass(cls): for fn in image_files: f.write(os.path.join(settings.SHARE_ROOT, fn), fn) cls._share_files.append(filename) - generate_manifest_file(data_type='images', manifest_path=os.path.join(settings.SHARE_ROOT, 'manifest.jsonl'), - sources=[os.path.join(settings.SHARE_ROOT, f'test_{i}.jpg') for i in range(1,4)]) + + filename = "test_archive_2_sorted.zip" + with zipfile.ZipFile(os.path.join(settings.SHARE_ROOT, filename), 'x') as f: + for fn in sorted(image_files): + f.write(os.path.join(settings.SHARE_ROOT, fn), fn) + cls._share_files.append(filename) image_sizes, images = generate_random_image_files("test_1.jpg", "test_2.jpg", "test_3.jpg") cls._client_images = { @@ -4042,10 +4048,79 @@ def _test_api_v2_tasks_id_data_create_can_use_server_images_and_manifest(self, u self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE) - task_spec.update([('name', 'my images+manifest #27')]) - task_data.update([('copy_data', True)]) - self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, - image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL) + for copy_data in [True, False]: + with self.subTest(current_function_name(), copy=copy_data): + task_spec = task_spec_common.copy() + task_spec['name'] = task_spec['name'] + f' copy={copy_data}' + task_data['copy_data'] = copy_data + self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, + self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + image_sizes, StorageMethodChoice.CACHE, + StorageChoice.LOCAL if copy_data else StorageChoice.SHARE) + + with self.subTest(current_function_name() + ' file order mismatch'), ExitStack() as es: + es.enter_context(self.assertRaisesMessage(Exception, + "Incorrect file mapping to manifest content" + )) + + # Suppress stacktrace spam from another thread from the expected error + es.enter_context(logging_disabled()) + + task_spec = task_spec_common.copy() + task_spec['name'] = task_spec['name'] + f' mismatching file order' + task_data_copy = task_data.copy() + task_data_copy[f'server_files[{len(images)}]'] = "images_manifest.jsonl" + self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data_copy, + self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE) + + for copy_data in [True, False]: + with self.subTest(current_function_name(), copy=copy_data): + task_spec = task_spec_common.copy() + task_spec['name'] = task_spec['name'] + f' copy={copy_data}' + task_data['copy_data'] = copy_data + self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, + self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + image_sizes, StorageMethodChoice.CACHE, + StorageChoice.LOCAL if copy_data else StorageChoice.SHARE) + + with self.subTest(current_function_name() + ' file order mismatch'), ExitStack() as es: + es.enter_context(self.assertRaisesMessage(Exception, + "Incorrect file mapping to manifest content" + )) + + # Suppress stacktrace spam from another thread from the expected error + es.enter_context(logging_disabled()) + + task_spec = task_spec_common.copy() + task_spec['name'] = task_spec['name'] + f' mismatching file order' + task_data_copy = task_data.copy() + task_data_copy[f'server_files[{len(images)}]'] = "images_manifest.jsonl" + self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data_copy, + self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE) + + with self.subTest(current_function_name() + ' without use cache'), ExitStack() as es: + es.enter_context(self.assertRaisesMessage(Exception, + "A manifest file can only be used with the 'use cache' option" + )) + + # Suppress stacktrace spam from another thread from the expected error + es.enter_context(logging_disabled()) + + def _send_callback(*args, **kwargs): + response = self._run_api_v2_tasks_id_data_post(*args, **kwargs) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + raise Exception(response.content.decode(response.charset)) + + task_spec = task_spec_common.copy() + task_spec['name'] = task_spec['name'] + f' manifest without cache' + task_data_copy = task_data.copy() + task_data_copy['use_cache'] = False + self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data_copy, + self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE, + send_data_callback=_send_callback) def _test_api_v2_tasks_id_data_create_can_use_server_images_with_predefined_sorting(self, user): task_spec = { @@ -4067,8 +4142,11 @@ def _test_api_v2_tasks_id_data_create_can_use_server_images_with_predefined_sort images = get_manifest_images_list(os.path.join(settings.SHARE_ROOT, manifest_name)) image_sizes = [self._share_image_sizes[v] for v in images] - for caching_enabled in [True, False]: + for caching_enabled, manifest in product( + [True, False], [True, False] + ): with self.subTest(current_function_name(), + manifest=manifest, caching_enabled=caching_enabled, ): task_data = task_data_common.copy() @@ -4079,15 +4157,216 @@ def _test_api_v2_tasks_id_data_create_can_use_server_images_with_predefined_sort else: storage_method = StorageMethodChoice.FILE_SYSTEM - task_data.update( - (f"server_files[{i}]", f) - for i, f in enumerate(images) - ) + if manifest: + task_data.update( + (f"server_files[{i}]", f) + for i, f in enumerate(reversed(images + [manifest_name])) + # Use a different order from what we have in the manifest. + # The files should be sorted during the task creation. + # Then we compare them with the original manifest order + ) + else: + task_data.update( + (f"server_files[{i}]", f) + for i, f in enumerate(images) + ) self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, image_sizes, storage_method, StorageChoice.SHARE) + def _test_api_v2_tasks_id_data_create_can_use_local_images_with_predefined_sorting(self, user): + task_spec = { + "name": 'task custom data sequence client files single request #28-2', + "overlap": 0, + "segment_size": 0, + "labels": [ + {"name": "car"}, + {"name": "person"}, + ] + } + + with TestDir() as test_dir: + image_sizes, image_files = generate_random_image_files( + "test_1.jpg", "test_3.jpg", "test_5.jpg", "test_4.jpg", "test_2.jpg" + ) + image_paths = [] + for image in image_files: + fp = os.path.join(test_dir, image.name) + with open(fp, 'wb') as f: + f.write(image.getvalue()) + image_paths.append(fp) + del image_files + + task_data_common = { + "image_quality": 75, + "sorting_method": SortingMethod.PREDEFINED + } + + for (caching_enabled, manifest) in product( + [True, False], [True, False] + ): + manifest_path = os.path.join(test_dir, "manifest.jsonl") + generate_manifest_file("images", manifest_path, image_paths, + sorting_method=SortingMethod.PREDEFINED) + + task_data_common["use_cache"] = caching_enabled + if caching_enabled: + storage_method = StorageMethodChoice.CACHE + else: + storage_method = StorageMethodChoice.FILE_SYSTEM + + with self.subTest(current_function_name(), + manifest=manifest, + caching_enabled=caching_enabled, + ), ExitStack() as es: + images = [es.enter_context(open(p, 'rb')) for p in image_paths] + + task_data = task_data_common.copy() + expected_image_sizes = image_sizes + + if manifest: + manifest_file = es.enter_context(open(manifest_path)) + task_data.update( + (f"client_files[{i}]", f) + for i, f in enumerate(reversed(images)) + # Use a different order from what we have in the manifest. + # The files should be sorted during the task creation. + # Then we compare them with the original manifest order + ) + task_data[f"client_files[{len(images)}]"] = manifest_file + else: + task_data.update( + (f"client_files[{i}]", f) + for i, f in enumerate(images) + ) + + self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, + self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + expected_image_sizes, storage_method, StorageChoice.LOCAL) + + def _test_api_v2_tasks_id_data_create_can_use_server_archive_with_predefined_sorting(self, user): + task_spec = { + "name": 'task custom data sequence server files single request #28-3', + "overlap": 0, + "segment_size": 0, + "labels": [ + {"name": "car"}, + {"name": "person"}, + ] + } + + task_data_common = { + "image_quality": 75, + "sorting_method": SortingMethod.PREDEFINED, + } + archive_name = "test_archive_2.zip" + + for (caching_enabled, manifest) in product( + [True, False], [True, False] + ): + with self.subTest(current_function_name(), + manifest=manifest, + caching_enabled=caching_enabled, + ), ExitStack() as es: + task_data = task_data_common.copy() + + task_data["use_cache"] = caching_enabled + if caching_enabled: + storage_method = StorageMethodChoice.CACHE + else: + storage_method = StorageMethodChoice.FILE_SYSTEM + + task_data["server_files[0]"] = archive_name + + manifest_name = "images_manifest.jsonl" + images = get_manifest_images_list(os.path.join(settings.SHARE_ROOT, manifest_name)) + image_sizes = [self._share_image_sizes[v] for v in images] + + if manifest: + task_data["server_files[1]"] = manifest_name + else: + es.enter_context(self.assertRaisesMessage(FileNotFoundError, + "Can't find upload manifest file" + )) + + # Suppress stacktrace spam from another thread from the expected error + es.enter_context(logging_disabled()) + + self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, + self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + image_sizes, storage_method, StorageChoice.LOCAL) + + def _test_api_v2_tasks_id_data_create_can_use_local_archive_with_predefined_sorting(self, user): + task_spec = { + "name": 'task custom data sequence client files single request #28-4', + "overlap": 0, + "segment_size": 0, + "labels": [ + {"name": "car"}, + {"name": "person"}, + ] + } + + with TestDir() as test_dir: + image_sizes, image_files = generate_random_image_files( + "test_1.jpg", "test_3.jpg", "test_5.jpg", "test_4.jpg", "test_2.jpg" + ) + image_paths = [] + for image in image_files: + fp = os.path.join(test_dir, image.name) + with open(fp, 'wb') as f: + f.write(image.getvalue()) + image_paths.append(fp) + + archive_path = os.path.join(test_dir, 'archive.zip') + with zipfile.ZipFile(archive_path, 'x') as archive: + for image_path in image_paths: + archive.write(image_path, os.path.relpath(image_path, test_dir)) + + del image_files + + task_data_common = { + "image_quality": 75, + "sorting_method": SortingMethod.PREDEFINED, + } + + for (caching_enabled, include_image_info, manifest) in product( + [True, False], [True, False], [True, False] + ): + with self.subTest(current_function_name(), + manifest=manifest, + caching_enabled=caching_enabled, + include_image_info=include_image_info + ), ExitStack() as es: + task_data = task_data_common.copy() + + manifest_path = os.path.join(test_dir, "manifest.jsonl") + generate_manifest_file("images", manifest_path, image_paths, + sorting_method=SortingMethod.PREDEFINED) + + task_data["use_cache"] = caching_enabled + if caching_enabled: + storage_method = StorageMethodChoice.CACHE + else: + storage_method = StorageMethodChoice.FILE_SYSTEM + + task_data[f"client_files[0]"] = es.enter_context(open(archive_path, 'rb')) + + if manifest: + task_data[f"client_files[1]"] = es.enter_context(open(manifest_path)) + else: + es.enter_context(self.assertRaisesMessage(FileNotFoundError, + "Can't find upload manifest file" + )) + + # Suppress stacktrace spam from another thread from the expected error + es.enter_context(logging_disabled()) + + self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, + self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + image_sizes, storage_method, StorageChoice.LOCAL) + def _test_api_v2_tasks_id_data_create_can_use_server_images_with_natural_sorting(self, user): task_spec = { "name": 'task native data sequence #29', @@ -4138,6 +4417,146 @@ def _test_api_v2_tasks_id_data_create_can_use_server_pdf(self, user): self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL) + def _test_api_v2_tasks_id_data_create_can_send_ordered_images_with_multifile_requests(self, user): + task_spec = { + "name": 'task custom data sequence client files multi request #31', + "overlap": 0, + "segment_size": 0, + "labels": [ + {"name": "car"}, + {"name": "person"}, + ] + } + + task_data_common = { + "image_quality": 70, + "sorting_method": SortingMethod.PREDEFINED, + } + + def _send_data(tid, user, data): + response = self._run_api_v2_tasks_id_data_post(tid, user, + data={ 'image_quality': task_data["image_quality"] }, + headers={ 'Upload-Start': True }) + assert response.status_code == status.HTTP_202_ACCEPTED, response.status_code + + for group_idx, file_group in enumerate(file_groups): + request_data = {k: v for k, v in data.items() if '_files' not in k} + request_data.update({ + f'client_files[{i}]': images[f] for i, f in enumerate(file_group) + }) + + if group_idx == len(file_groups) - 1: + headers = { 'Upload-Finish': True } + request_data['upload_file_order'] = upload_info + else: + headers = { 'Upload-Multiple': True } + + response = self._run_api_v2_tasks_id_data_post(tid, user, data=request_data, + headers=headers) + + if group_idx != len(file_groups) - 1: + assert response.status_code == status.HTTP_200_OK, response.status_code + return response + + def _send_data_and_fail(*args, **kwargs): + response = _send_data(*args, **kwargs) + assert response.status_code == status.HTTP_400_BAD_REQUEST + raise Exception(response.data) + + filenames = [ + "test_1.jpg", "test_3.jpg", "test_5.jpg", "test_qwe.jpg", "test_4.jpg", "test_2.jpg" + ] + + for name, upload_info, file_groups in ( + ( + "input ordering, multiple requests, the last one has data", + [], + [ + [ 0, 1, 2 ], + [ 3, 4 ], + [ 5, ], + ] + ), + + ( + "input ordering, multiple requests, the last one has no data", + [], + [ + [ 0, 1, 2 ], + [ 3, 4, 5 ], + [ ], + ] + ), + + ( + "input ordering, multiple requests, the last one has data, has an empty request", + [], + [ + [ 0, 1, 2 ], + [ ], + [ 3, 4, 5 ], + ] + ), + + ( + "custom ordering, multiple requests, the last one has no data, files unordered", + filenames, + [ + [ 2, 4, 0 ], + [ 3, 5, 1 ], + [ ], + ] + ), + + ( + "custom ordering, multiple requests, the last one has data, files unordered", + filenames, + [ + [ 2, 0 ], + [ 3, 5, 4 ], + [ 1, ], + ] + ), + ): + with self.subTest(current_function_name() + ' ' + name): + image_sizes, images = generate_random_image_files(*filenames) + + task_data = task_data_common.copy() + task_data.update((f"client_files[{i}]", f) for i, f in enumerate(images)) + + self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, + self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + image_sizes, StorageMethodChoice.FILE_SYSTEM, StorageChoice.LOCAL, + send_data_callback=_send_data) + + with self.subTest(current_function_name() + ' mismatching file sets - extra files'): + upload_info = [filenames[0]] + file_groups = [[ 0, 1 ]] + image_sizes, images = generate_random_image_files(*filenames[:2]) + + task_data = task_data_common.copy() + task_data.update((f"client_files[{i}]", f) for i, f in enumerate(images)) + + with self.assertRaisesMessage(Exception, "(extra)"): + self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, + self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + image_sizes, StorageMethodChoice.FILE_SYSTEM, StorageChoice.LOCAL, + send_data_callback=_send_data_and_fail) + + with self.subTest(current_function_name() + ' mismatching file sets - missing files'): + upload_info = filenames[0:3] + file_groups = [[ 0, 1 ]] + image_sizes, images = generate_random_image_files(*upload_info) + + task_data = task_data_common.copy() + task_data.update((f"client_files[{i}]", f) for i, f in enumerate(images)) + + with self.assertRaisesMessage(Exception, "(missing)"): + self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, + self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + image_sizes, StorageMethodChoice.FILE_SYSTEM, StorageChoice.LOCAL, + send_data_callback=_send_data_and_fail) + def _test_api_v2_tasks_id_data_create(self, user): method_list = { func: getattr(self, func) for func in dir(self) diff --git a/cvat/apps/engine/utils.py b/cvat/apps/engine/utils.py index 0828f40b4b60..af1ee77bc9e8 100644 --- a/cvat/apps/engine/utils.py +++ b/cvat/apps/engine/utils.py @@ -9,17 +9,21 @@ import importlib import sys import traceback -from typing import Any, Dict, Optional +from contextlib import suppress +from typing import Any, Dict, Optional, Callable, Union import subprocess import os import urllib.parse +import logging import platform + +from rq.job import Job +from django_rq.queues import DjangoRQ from pathlib import Path from django.http.request import HttpRequest from django.utils import timezone from django.utils.http import urlencode - from rest_framework.reverse import reverse as _reverse from av import VideoFrame @@ -133,19 +137,29 @@ def parse_exception_message(msg): pass return parsed_msg -def process_failed_job(rq_job): - if rq_job.meta['tmp_file_descriptor']: - os.close(rq_job.meta['tmp_file_descriptor']) - if os.path.exists(rq_job.meta['tmp_file']): - os.remove(rq_job.meta['tmp_file']) - exc_info = str(rq_job.exc_info or rq_job.dependency.exc_info) +def process_failed_job(rq_job: Job): + exc_info = str(rq_job.exc_info or getattr(rq_job.dependency, 'exc_info', None) or '') if rq_job.dependency: rq_job.dependency.delete() rq_job.delete() - return parse_exception_message(exc_info) - -def configure_dependent_job(queue, rq_id, rq_func, db_storage, filename, key, request): + msg = parse_exception_message(exc_info) + log = logging.getLogger('cvat.server.engine') + log.error(msg) + return msg + + +def configure_dependent_job( + queue: DjangoRQ, + rq_id: str, + rq_func: Callable[[Any, str, str], None], + db_storage: Any, + filename: str, + key: str, + request: HttpRequest, + result_ttl: float, + failure_ttl: float +) -> Job: rq_job_id_download_file = rq_id + f'?action=download_{filename}' rq_job_download_file = queue.fetch_job(rq_job_id_download_file) if not rq_job_download_file: @@ -155,6 +169,8 @@ def configure_dependent_job(queue, rq_id, rq_func, db_storage, filename, key, re args=(db_storage, filename, key), job_id=rq_job_id_download_file, meta=get_rq_job_meta(request=request, db_obj=db_storage), + result_ttl=result_ttl, + failure_ttl=failure_ttl ) return rq_job_download_file @@ -220,6 +236,27 @@ def get_list_view_name(model): 'model_name': model._meta.object_name.lower() } +def get_import_rq_id( + resource_type: str, + resource_id: int, + subresource_type: str, + user: str, +) -> str: + # import:---by- + return f"import:{resource_type}-{resource_id}-{subresource_type}-by-{user}" + +def import_resource_with_clean_up_after( + func: Union[Callable[[str, int, int], int], Callable[[str, int, str, bool], None]], + filename: str, + *args, + **kwargs, +) -> Any: + try: + result = func(filename, *args, **kwargs) + finally: + with suppress(FileNotFoundError): + os.remove(filename) + return result def get_cpu_number() -> int: cpu_number = None diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 950857db75c9..e24e9d809516 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -8,16 +8,19 @@ import os.path as osp import pytz import traceback +import textwrap +from copy import copy from datetime import datetime from distutils.util import strtobool -from tempfile import mkstemp +from tempfile import NamedTemporaryFile +from typing import Any, Dict, List, cast from django.db.models.query import Prefetch import django_rq from django.apps import apps from django.conf import settings from django.contrib.auth.models import User -from django.db import IntegrityError +from django.db import IntegrityError, transaction from django.http import HttpResponse, HttpResponseNotFound, HttpResponseBadRequest from django.utils import timezone from django.db.models import Count, Q @@ -47,7 +50,7 @@ from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.media_extractors import get_mime from cvat.apps.engine.models import ( - Job, Label, Task, Project, Issue, Data, + ClientFile, Job, Label, Task, Project, Issue, Data, Comment, StorageMethodChoice, StorageChoice, CloudProviderChoice, Location ) @@ -62,13 +65,12 @@ UserSerializer, PluginsSerializer, IssueReadSerializer, IssueWriteSerializer, CommentReadSerializer, CommentWriteSerializer, CloudStorageWriteSerializer, CloudStorageReadSerializer, DatasetFileSerializer, - ProjectFileSerializer, TaskFileSerializer, CloudStorageContentSerializer) + ProjectFileSerializer, TaskFileSerializer, RqIdSerializer, CloudStorageContentSerializer) from cvat.apps.engine.view_utils import get_cloud_storage_for_import_or_export -from cvat.apps.engine.schema import ORGANIZATION_OPEN_API_PARAMETERS from utils.dataset_manifest import ImageManifestManager from cvat.apps.engine.utils import ( - av_scan_paths, process_failed_job, configure_dependent_job, parse_exception_message, get_rq_job_meta + av_scan_paths, process_failed_job, configure_dependent_job, parse_exception_message, get_rq_job_meta, get_import_rq_id, import_resource_with_clean_up_after ) from cvat.apps.engine import backup from cvat.apps.engine.mixins import PartialUpdateModelMixin, UploadMixin, AnnotationMixin, SerializeMixin @@ -79,6 +81,7 @@ from cvat.apps.iam.permissions import (CloudStoragePermission, CommentPermission, IssuePermission, JobPermission, LabelPermission, ProjectPermission, TaskPermission, UserPermission) +from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS from cvat.apps.engine.cache import MediaCache from cvat.apps.events.handlers import handle_annotations_patch from cvat.apps.engine.view_utils import tus_chunk_action @@ -249,6 +252,7 @@ class ProjectViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, ordering = "-id" lookup_fields = {'owner': 'owner__username', 'assignee': 'assignee__username'} iam_organization_field = 'organization' + IMPORT_RQ_ID_TEMPLATE = get_import_rq_id('project', {}, 'dataset', {}) def get_serializer_class(self): if self.request.method in SAFE_METHODS: @@ -271,6 +275,14 @@ def perform_create(self, serializer, **kwargs): ) @extend_schema(methods=['GET'], summary='Export project as a dataset in a specific format', + description=textwrap.dedent(""" + To check the status of the process of importing a project dataset from a file: + + After initiating the dataset upload, you will receive an rq_id parameter. + Make sure to include this parameter as a query parameter in your subsequent + GET /api/projects/id/dataset requests to track the status of the dataset import. + Also you should specify action parameter: action=import_status. + """), parameters=[ OpenApiParameter('format', description='Desired output format name\n' 'You can get the list of supported formats at:\n/server/annotation/formats', @@ -287,6 +299,8 @@ def perform_create(self, serializer, **kwargs): OpenApiParameter('use_default_location', description='Use the location that was configured in project to import dataset', location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, default=True), + OpenApiParameter('rq_id', description='rq id', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], responses={ '200': OpenApiResponse(OpenApiTypes.BINARY, description='Download of file started'), @@ -294,7 +308,13 @@ def perform_create(self, serializer, **kwargs): '202': OpenApiResponse(description='Exporting has been started'), '405': OpenApiResponse(description='Format is not available'), }) - @extend_schema(methods=['POST'], summary='Import dataset in specific format as a project', + @extend_schema(methods=['POST'], + summary='Import dataset in specific format as a project or check status of dataset import process', + description=textwrap.dedent(""" + The request POST /api/projects/id/dataset will initiate file upload and will create + the rq job on the server in which the process of dataset import from a file + will be carried out. Please, use the GET /api/projects/id/dataset endpoint for checking status of the process. + """), parameters=[ OpenApiParameter('format', description='Desired dataset format name\n' 'You can get the list of supported formats at:\n/server/annotation/formats', @@ -315,7 +335,7 @@ def perform_create(self, serializer, **kwargs): resource_type_field_name=None ), responses={ - '202': OpenApiResponse(description='Importing has been started'), + '202': OpenApiResponse(RqIdSerializer, description='Importing has been started'), '400': OpenApiResponse(description='Failed to import dataset'), '405': OpenApiResponse(description='Format is not available'), }) @@ -323,7 +343,6 @@ def perform_create(self, serializer, **kwargs): url_path=r'dataset/?$', parser_classes=_UPLOAD_PARSER_CLASSES) def dataset(self, request, pk): self._object = self.get_object() # force call of check_object_permissions() - rq_id = f"import:dataset-for-project.id{pk}-by-{request.user}" if request.method in {'POST', 'OPTIONS'}: return self.import_annotations( @@ -331,18 +350,26 @@ def dataset(self, request, pk): db_obj=self._object, import_func=_import_project_dataset, rq_func=dm.project.import_dataset_as_project, - rq_id=rq_id, + rq_id_template=self.IMPORT_RQ_ID_TEMPLATE ) else: action = request.query_params.get("action", "").lower() if action in ("import_status",): queue = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) + rq_id = request.query_params.get('rq_id') + if not rq_id: + return Response('The rq_id param should be specified in the query parameters', status=status.HTTP_400_BAD_REQUEST) + + # check that the user has access to the current rq_job + # We should not return any status of job including "404 not found" for user that has no access for this rq_job + + if self.IMPORT_RQ_ID_TEMPLATE.format(pk, request.user) != rq_id: + return Response(status=status.HTTP_403_FORBIDDEN) + rq_job = queue.fetch_job(rq_id) if rq_job is None: return Response(status=status.HTTP_404_NOT_FOUND) elif rq_job.is_finished: - if rq_job.meta['tmp_file_descriptor']: os.close(rq_job.meta['tmp_file_descriptor']) - os.remove(rq_job.meta['tmp_file']) if rq_job.dependency: rq_job.dependency.delete() rq_job.delete() @@ -395,7 +422,7 @@ def upload_finished(self, request): return _import_project_dataset( request=request, filename=uploaded_file, - rq_id=f"import:dataset-for-project.id{self._object.pk}-by-{request.user}", + rq_id_template=self.IMPORT_RQ_ID_TEMPLATE, rq_func=dm.project.import_dataset_as_project, db_obj=self._object, format_name=format_name, @@ -484,7 +511,19 @@ def annotations(self, request, pk): def export_backup(self, request, pk=None): return self.serialize(request, backup.export) - @extend_schema(summary='Methods create a project from a backup', + @extend_schema(methods=['POST'], summary='Methods create a project from a backup', + description=textwrap.dedent(""" + The backup import process is as follows: + + The first request POST /api/projects/backup will initiate file upload and will create + the rq job on the server in which the process of a project creating from an uploaded backup + will be carried out. + + After initiating the backup upload, you will receive an rq_id parameter. + Make sure to include this parameter as a query parameter in your subsequent requests + to track the status of the project creation. + Once the project has been successfully created, the server will return the id of the newly created project. + """), parameters=[ *ORGANIZATION_OPEN_API_PARAMETERS, OpenApiParameter('location', description='Where to import the backup file from', @@ -494,14 +533,20 @@ def export_backup(self, request, pk=None): location=OpenApiParameter.QUERY, type=OpenApiTypes.NUMBER, required=False), OpenApiParameter('filename', description='Backup file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), + OpenApiParameter('rq_id', description='rq id', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], request=PolymorphicProxySerializer('BackupWrite', serializers=[ProjectFileSerializer, OpenApiTypes.NONE], resource_type_field_name=None ), + # TODO: for some reason the code generated by the openapi generator from schema with different serializers + # contains only one serializer, need to fix that. + # https://github.com/OpenAPITools/openapi-generator/issues/6126 responses={ - '201': OpenApiResponse(description='The project has been imported'), # or better specify {id: project_id} - '202': OpenApiResponse(description='Importing a backup file has been started'), + # 201: OpenApiResponse(inline_serializer("ImportedProjectIdSerializer", fields={"id": serializers.IntegerField(required=True)}) + '201': OpenApiResponse(description='The project has been imported'), + '202': OpenApiResponse(RqIdSerializer, description='Importing a backup file has been started'), }) @action(detail=False, methods=['OPTIONS', 'POST'], url_path=r'backup/?$', serializer_class=ProjectFileSerializer(required=False), @@ -599,7 +644,6 @@ def __call__(self, request, start, stop, db_data): # mimetype detection inside sendfile will work incorrectly. path = os.path.realpath(frame_provider.get_chunk(self.number, self.quality)) return sendfile(request, path) - elif self.type == 'frame' or self.type == 'preview': if not (start <= self.number <= stop): raise ValidationError('The frame number should be in ' + @@ -701,6 +745,7 @@ class TaskViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, ordering_fields = list(filter_fields) ordering = "-id" iam_organization_field = 'organization' + IMPORT_RQ_ID_TEMPLATE = get_import_rq_id('task', {}, 'annotations', {}) def get_serializer_class(self): if self.request.method in SAFE_METHODS: @@ -718,6 +763,18 @@ def get_queryset(self): return queryset @extend_schema(summary='Method recreates a task from an attached task backup file', + description=textwrap.dedent(""" + The backup import process is as follows: + + The first request POST /api/tasks/backup will initiate file upload and will create + the rq job on the server in which the process of a task creating from an uploaded backup + will be carried out. + + After initiating the backup upload, you will receive an rq_id parameter. + Make sure to include this parameter as a query parameter in your subsequent requests + to track the status of the task creation. + Once the task has been successfully created, the server will return the id of the newly created task. + """), parameters=[ *ORGANIZATION_OPEN_API_PARAMETERS, OpenApiParameter('location', description='Where to import the backup file from', @@ -727,12 +784,19 @@ def get_queryset(self): location=OpenApiParameter.QUERY, type=OpenApiTypes.NUMBER, required=False), OpenApiParameter('filename', description='Backup file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), + OpenApiParameter('rq_id', description='rq id', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], request=TaskFileSerializer(required=False), + # TODO: for some reason the code generated by the openapi generator from schema with different serializers + # contains only one serializer, need to fix that. + # https://github.com/OpenAPITools/openapi-generator/issues/6126 responses={ - '201': OpenApiResponse(description='The task has been imported'), # or better specify {id: task_id} - '202': OpenApiResponse(description='Importing a backup file has been started'), + # 201: OpenApiResponse(inline_serializer("ImportedTaskIdSerializer", fields={"id": serializers.IntegerField(required=True)}) + '201': OpenApiResponse(description='The task has been imported'), + '202': OpenApiResponse(RqIdSerializer, description='Importing a backup file has been started'), }) + @action(detail=False, methods=['OPTIONS', 'POST'], url_path=r'backup/?$', serializer_class=TaskFileSerializer(required=False), parser_classes=_UPLOAD_PARSER_CLASSES) @@ -790,17 +854,87 @@ def perform_create(self, serializer, **kwargs): db_project.save() assert serializer.instance.organization == db_project.organization + def _is_data_uploading(self) -> bool: + return 'data' in self.action + # UploadMixin method def get_upload_dir(self): if 'annotations' in self.action: return self._object.get_tmp_dirname() - elif 'data' in self.action: + elif self._is_data_uploading(): return self._object.data.get_upload_dirname() elif 'backup' in self.action: return backup.get_backup_dirname() return "" + def _prepare_upload_info_entry(self, filename: str) -> str: + filename = osp.normpath(filename) + upload_dir = self.get_upload_dir() + return osp.join(upload_dir, filename) + + def _maybe_append_upload_info_entry(self, filename: str): + task_data = cast(Data, self._object.data) + + filename = self._prepare_upload_info_entry(filename) + task_data.client_files.get_or_create(file=filename) + + def _append_upload_info_entries(self, client_files: List[Dict[str, Any]]): + # batch version without optional insertion + task_data = cast(Data, self._object.data) + task_data.client_files.bulk_create([ + ClientFile(**cf, data=task_data) for cf in client_files + ]) + + def _sort_uploaded_files(self, uploaded_files: List[str], ordering: List[str]) -> List[str]: + """ + Applies file ordering for the "predefined" file sorting method of the task creation. + + Read more: https://github.com/opencv/cvat/issues/5061 + """ + + expected_files = ordering + + uploaded_file_names = set(uploaded_files) + mismatching_files = list(uploaded_file_names.symmetric_difference(expected_files)) + if mismatching_files: + DISPLAY_ENTRIES_COUNT = 5 + mismatching_display = [ + fn + (" (extra)" if fn in uploaded_file_names else " (missing)") + for fn in mismatching_files[:DISPLAY_ENTRIES_COUNT] + ] + remaining_count = len(mismatching_files) - DISPLAY_ENTRIES_COUNT + raise ValidationError( + "Uploaded files do not match the '{}' field contents. " + "Please check the uploaded data and the list of uploaded files. " + "Mismatching files: {}{}" + .format( + self._UPLOAD_FILE_ORDER_FIELD, + ", ".join(mismatching_display), + f" (and {remaining_count} more). " if 0 < remaining_count else "" + ) + ) + + return list(expected_files) + + # UploadMixin method + def init_tus_upload(self, request): + response = super().init_tus_upload(request) + + if self._is_data_uploading() and response.status_code == status.HTTP_201_CREATED: + self._maybe_append_upload_info_entry(response['Upload-Filename']) + + return response + + # UploadMixin method + def append_files(self, request): + client_files = self._get_request_client_files(request) + if self._is_data_uploading() and client_files: + self._append_upload_info_entries(client_files) + + return super().append_files(request) + # UploadMixin method + @transaction.atomic def upload_finished(self, request): if self.action == 'annotations': format_name = request.query_params.get("format", "") @@ -812,8 +946,7 @@ def upload_finished(self, request): return _import_annotations( request=request, filename=annotation_file, - rq_id=(f"import:annotations-for-task.id{self._object.pk}-" - f"in-{format_name.replace(' ', '_')}-by-{request.user}"), + rq_id_template=self.IMPORT_RQ_ID_TEMPLATE, rq_func=dm.task.import_task_annotations, db_obj=self._object, format_name=format_name, @@ -826,23 +959,41 @@ def upload_finished(self, request): task_data = self._object.data serializer = DataSerializer(task_data, data=request.data) serializer.is_valid(raise_exception=True) - data = dict(serializer.validated_data.items()) - uploaded_files = task_data.get_uploaded_files() - uploaded_files.extend(data.get('client_files')) - serializer.validated_data.update({'client_files': uploaded_files}) + # Append new files to the previous ones + if uploaded_files := serializer.validated_data.get('client_files', None): + self._append_upload_info_entries(uploaded_files) + serializer.validated_data['client_files'] = [] # avoid file info duplication + + # Refresh the db value with the updated file list and other request parameters db_data = serializer.save() self._object.data = db_data self._object.save() - data = {k: v for k, v in serializer.data.items()} + + # Create a temporary copy of the parameters we will try to create the task with + data = copy(serializer.data) for optional_field in ['job_file_mapping', 'server_files_exclude']: if optional_field in serializer.validated_data: data[optional_field] = serializer.validated_data[optional_field] + if ( + data['sorting_method'] == models.SortingMethod.PREDEFINED + and (uploaded_files := data['client_files']) + and ( + uploaded_file_order := serializer.validated_data[self._UPLOAD_FILE_ORDER_FIELD] + ) + ): + # In the case of predefined sorting and custom file ordering, + # the requested order must be applied + data['client_files'] = self._sort_uploaded_files( + uploaded_files, uploaded_file_order + ) + data['use_zip_chunks'] = serializer.validated_data['use_zip_chunks'] data['use_cache'] = serializer.validated_data['use_cache'] data['copy_data'] = serializer.validated_data['copy_data'] + if data['use_cache']: self._object.data.storage_method = StorageMethodChoice.CACHE self._object.data.save(update_fields=['storage_method']) @@ -852,9 +1003,9 @@ def upload_finished(self, request): if db_data.cloud_storage: self._object.data.storage = StorageChoice.CLOUD_STORAGE self._object.data.save(update_fields=['storage']) + if 'stop_frame' not in serializer.validated_data: # if the value of stop_frame is 0, then inside the function we cannot know # the value specified by the user or it's default value from the database - if 'stop_frame' not in serializer.validated_data: data['stop_frame'] = None task.create(self._object, data, request) return Response(serializer.data, status=status.HTTP_202_ACCEPTED) @@ -875,12 +1026,69 @@ def upload_finished(self, request): return Response(data='Unknown upload was finished', status=status.HTTP_400_BAD_REQUEST) + _UPLOAD_FILE_ORDER_FIELD = 'upload_file_order' + assert _UPLOAD_FILE_ORDER_FIELD in DataSerializer().fields + @extend_schema(methods=['POST'], - summary='Method permanently attaches images or video to a task. Supports tus uploads, see more https://tus.io/', + summary="Method permanently attaches data (images, video, etc.) to a task", + description=textwrap.dedent("""\ + Allows to upload data to a task. + Supports the TUS open file uploading protocol (https://tus.io/). + + Supports the following protocols: + + 1. A single Data request + + and + + 2.1. An Upload-Start request + 2.2.a. Regular TUS protocol requests (Upload-Length + Chunks) + 2.2.b. Upload-Multiple requests + 2.3. An Upload-Finish request + + Requests: + - Data - POST, no extra headers or 'Upload-Start' + 'Upload-Finish' headers. + Contains data in the body. + - Upload-Start - POST, has an 'Upload-Start' header. No body is expected. + - Upload-Length - POST, has an 'Upload-Length' header (see the TUS specification) + - Chunk - HEAD/PATCH (see the TUS specification). Sent to /data/ endpoints. + - Upload-Finish - POST, has an 'Upload-Finish' header. Can contain data in the body. + - Upload-Multiple - POST, has an 'Upload-Multiple' header. Contains data in the body. + + The 'Upload-Finish' request allows to specify the uploaded files should be ordered. + This may be needed if the files can be sent unordered. To state that the input files + are sent ordered, pass an empty list of files in the '{upload_file_order_field}' field. + If the files are sent unordered, the ordered file list is expected + in the '{upload_file_order_field}' field. It must be a list of string file paths, + relative to the dataset root. + + Example: + files = [ + "cats/cat_1.jpg", + "dogs/dog2.jpg", + "image_3.png", + ... + ] + + Independently of the file declaration field used + ('client_files', 'server_files', etc.), when the 'predefined' + sorting method is selected, the uploaded files will be ordered according + to the '.jsonl' manifest file, if it is found in the list of files. + For archives (e.g. '.zip'), a manifest file ('*.jsonl') is required when using + the 'predefined' file ordering. Such file must be provided next to the archive + in the list of files. Read more about manifest files here: + https://opencv.github.io/cvat/docs/manual/advanced/dataset_manifest/ + + After all data is sent, the operation status can be retrieved via + the /status endpoint. + """.format_map( + {'upload_file_order_field': _UPLOAD_FILE_ORDER_FIELD} + )), + # TODO: add a tutorial on this endpoint in the REST API docs request=DataSerializer, parameters=[ OpenApiParameter('Upload-Start', location=OpenApiParameter.HEADER, type=OpenApiTypes.BOOL, - description='Initializes data upload. No data should be sent with this header'), + description='Initializes data upload. Optionally, can include upload metadata in the request body.'), OpenApiParameter('Upload-Multiple', location=OpenApiParameter.HEADER, type=OpenApiTypes.BOOL, description='Indicates that data with this request are single or multiple files that should be attached to a task'), OpenApiParameter('Upload-Finish', location=OpenApiParameter.HEADER, type=OpenApiTypes.BOOL, @@ -889,7 +1097,8 @@ def upload_finished(self, request): responses={ '202': OpenApiResponse(description=''), }) - @extend_schema(methods=['GET'], summary='Method returns data for a specific task', + @extend_schema(methods=['GET'], + summary='Method returns data for a specific task', parameters=[ OpenApiParameter('type', location=OpenApiParameter.QUERY, required=False, type=OpenApiTypes.STR, enum=['chunk', 'frame', 'context_image'], @@ -964,10 +1173,19 @@ def append_data_chunk(self, request, pk, file_id): '400': OpenApiResponse(description='Exporting without data is not allowed'), '405': OpenApiResponse(description='Format is not available'), }) - @extend_schema(methods=['PUT'], summary='Method allows to upload task annotations', + @extend_schema(methods=['PUT'], summary='Method allows to upload task annotations or edit existing annotations', + description=textwrap.dedent(""" + To check the status of the process of uploading a task annotations from a file: + + After initiating the annotations upload, you will receive an rq_id parameter. + Make sure to include this parameter as a query parameter in your subsequent + PUT /api/tasks/id/annotations requests to track the status of the annotations upload. + """), parameters=[ OpenApiParameter('format', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, description='Input format name\nYou can get the list of supported formats at:\n/server/annotation/formats'), + OpenApiParameter('rq_id', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, + description='rq id'), ], request=PolymorphicProxySerializer('TaskAnnotationsUpdate', serializers=[LabeledDataSerializer, AnnotationFileSerializer, OpenApiTypes.NONE], @@ -979,7 +1197,12 @@ def append_data_chunk(self, request, pk, file_id): '405': OpenApiResponse(description='Format is not available'), }) @extend_schema(methods=['POST'], - summary="Method allows to upload task annotations from a local file or a cloud storage", + summary="Method allows to initialize the process of upload task annotations from a local or a cloud storage file", + description=textwrap.dedent(""" + The request POST /api/tasks/id/annotations will initiate file upload and will create + the rq job on the server in which the process of annotations uploading from file + will be carried out. Please, use the PUT /api/tasks/id/annotations endpoint for checking status of the process. + """), parameters=[ OpenApiParameter('format', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, description='Input format name\nYou can get the list of supported formats at:\n/server/annotation/formats'), @@ -1000,7 +1223,7 @@ def append_data_chunk(self, request, pk, file_id): ), responses={ '201': OpenApiResponse(description='Uploading has finished'), - '202': OpenApiResponse(description='Uploading has been started'), + '202': OpenApiResponse(RqIdSerializer, description='Uploading has been started'), '405': OpenApiResponse(description='Format is not available'), }) @extend_schema(methods=['PATCH'], summary='Method performs a partial update of annotations in a specific task', @@ -1033,17 +1256,19 @@ def annotations(self, request, pk): return Response(data="Exporting annotations from a task without data is not allowed", status=status.HTTP_400_BAD_REQUEST) elif request.method == 'POST' or request.method == 'OPTIONS': + # NOTE: initialization process of annotations import format_name = request.query_params.get('format', '') return self.import_annotations( request=request, db_obj=self._object, import_func=_import_annotations, rq_func=dm.task.import_task_annotations, - rq_id = f"import:annotations-for-task.id{pk}-in-{format_name.replace(' ', '_')}-by-{request.user}" + rq_id_template=self.IMPORT_RQ_ID_TEMPLATE ) elif request.method == 'PUT': format_name = request.query_params.get('format', '') if format_name: + # NOTE: continue process of import annotations use_settings = strtobool(str(request.query_params.get('use_default_location', True))) conv_mask_to_poly = strtobool(request.query_params.get('conv_mask_to_poly', 'True')) obj = self._object if use_settings else request.query_params @@ -1052,7 +1277,7 @@ def annotations(self, request, pk): ) return _import_annotations( request=request, - rq_id = f"import:annotations-for-task.id{pk}-in-{format_name.replace(' ', '_')}-by-{request.user}", + rq_id_template=self.IMPORT_RQ_ID_TEMPLATE, rq_func=dm.task.import_task_annotations, db_obj=self._object, format_name=format_name, @@ -1276,6 +1501,7 @@ class JobViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, 'project_name': 'segment__task__project__name', 'assignee': 'assignee__username' } + IMPORT_RQ_ID_TEMPLATE = get_import_rq_id('job', {}, 'annotations', {}) def get_queryset(self): queryset = super().get_queryset() @@ -1294,24 +1520,21 @@ def get_serializer_class(self): # UploadMixin method def get_upload_dir(self): - task = self._object.segment.task - return task.get_tmp_dirname() + return self._object.get_tmp_dirname() # UploadMixin method def upload_finished(self, request): - task = self._object.segment.task if self.action == 'annotations': format_name = request.query_params.get("format", "") filename = request.query_params.get("filename", "") conv_mask_to_poly = strtobool(request.query_params.get('conv_mask_to_poly', 'True')) - tmp_dir = task.get_tmp_dirname() + tmp_dir = self.get_upload_dir() if os.path.isfile(os.path.join(tmp_dir, filename)): annotation_file = os.path.join(tmp_dir, filename) return _import_annotations( request=request, filename=annotation_file, - rq_id=(f"import:annotations-for-job.id{self._object.pk}-" - f"in-{format_name.replace(' ', '_')}-by-{request.user}"), + rq_id_template=self.IMPORT_RQ_ID_TEMPLATE, rq_func=dm.task.import_job_annotations, db_obj=self._object, format_name=format_name, @@ -1354,7 +1577,13 @@ def upload_finished(self, request): '202': OpenApiResponse(description='Exporting has been started'), '405': OpenApiResponse(description='Format is not available'), }) - @extend_schema(methods=['POST'], summary='Method allows to upload job annotations', + @extend_schema(methods=['POST'], + summary='Method allows to initialize the process of the job annotation upload from a local file or a cloud storage', + description=textwrap.dedent(""" + The request POST /api/jobs/id/annotations will initiate file upload and will create + the rq job on the server in which the process of annotations uploading from file + will be carried out. Please, use the PUT /api/jobs/id/annotations endpoint for checking status of the process. + """), parameters=[ OpenApiParameter('format', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, description='Input format name\nYou can get the list of supported formats at:\n/server/annotation/formats'), @@ -1372,13 +1601,24 @@ def upload_finished(self, request): request=AnnotationFileSerializer, responses={ '201': OpenApiResponse(description='Uploading has finished'), - '202': OpenApiResponse(description='Uploading has been started'), + '202': OpenApiResponse(RqIdSerializer, description='Uploading has been started'), '405': OpenApiResponse(description='Format is not available'), }) - @extend_schema(methods=['PUT'], summary='Method performs an update of all annotations in a specific job', + @extend_schema(methods=['PUT'], + summary='Method performs an update of all annotations in a specific job ' + 'or used for uploading annotations from a file', + description=textwrap.dedent(""" + To check the status of the process of uploading a job annotations from a file: + + After initiating the annotations upload, you will receive an rq_id parameter. + Make sure to include this parameter as a query parameter in your subsequent + PUT /api/jobs/id/annotations requests to track the status of the annotations upload. + """), parameters=[ OpenApiParameter('format', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, description='Input format name\nYou can get the list of supported formats at:\n/server/annotation/formats'), + OpenApiParameter('rq_id', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, + description='rq id'), ], request=PolymorphicProxySerializer( component_name='JobAnnotationsUpdate', @@ -1420,11 +1660,10 @@ def annotations(self, request, pk): format_name = request.query_params.get('format', '') return self.import_annotations( request=request, - db_obj=self._object.segment.task, + db_obj=self._object, import_func=_import_annotations, rq_func=dm.task.import_job_annotations, - rq_id=(f"import:annotations-for-job.id{self._object.pk}-" - f"in-{format_name.replace(' ', '_')}-by-{request.user}"), + rq_id_template=self.IMPORT_RQ_ID_TEMPLATE ) elif request.method == 'PUT': @@ -1438,8 +1677,7 @@ def annotations(self, request, pk): ) return _import_annotations( request=request, - rq_id=(f"import:annotations-for-job.id{pk}-" - f"in-{format_name.replace(' ', '_')}-by-{request.user}"), + rq_id_template=self.IMPORT_RQ_ID_TEMPLATE, rq_func=dm.task.import_job_annotations, db_obj=self._object, format_name=format_name, @@ -1826,10 +2064,7 @@ class LabelViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, 'project__organization' ).all() - # NOTE: This filter works incorrectly for this view - # it requires task__organization OR project__organization check. - # Thus, we rely on permission-based filtering - iam_organization_field = None + iam_organization_field = ('task__organization', 'project__organization') search_fields = ('name', 'parent') filter_fields = list(search_fields) + ['id', 'type', 'color', 'parent_id'] @@ -2307,9 +2542,9 @@ def actions(self, request, pk): return HttpResponseBadRequest(msg) def rq_exception_handler(rq_job, exc_type, exc_value, tb): - rq_job.exc_info = "".join( + rq_job.meta["formatted_exception"] = "".join( traceback.format_exception_only(exc_type, exc_value)) - rq_job.save() + rq_job.save_meta() return True @@ -2320,8 +2555,9 @@ def _download_file_from_bucket(db_storage, filename, key): with open(filename, 'wb+') as f: f.write(data.getbuffer()) -def _import_annotations(request, rq_id, rq_func, db_obj, format_name, +def _import_annotations(request, rq_id_template, rq_func, db_obj, format_name, filename=None, location_conf=None, conv_mask_to_poly=True): + format_desc = {f.DISPLAY_NAME: f for f in dm.views.get_import_formats()}.get(format_name) if format_desc is None: @@ -2330,14 +2566,29 @@ def _import_annotations(request, rq_id, rq_func, db_obj, format_name, elif not format_desc.ENABLED: return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) + rq_id = request.query_params.get('rq_id') + rq_id_should_be_checked = bool(rq_id) + if not rq_id: + rq_id = rq_id_template.format(db_obj.pk, request.user) + queue = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) rq_job = queue.fetch_job(rq_id) + if rq_id_should_be_checked and rq_id_template.format(db_obj.pk, request.user) != rq_id: + return Response(status=status.HTTP_403_FORBIDDEN) + + if rq_job and request.method == 'POST': + # If there is a previous job that has not been deleted + if rq_job.is_finished or rq_job.is_failed: + rq_job.delete() + rq_job = queue.fetch_job(rq_id) + else: + return Response(status=status.HTTP_409_CONFLICT, data='Import job already exists') + if not rq_job: # If filename is specified we consider that file was uploaded via TUS, so it exists in filesystem # Then we dont need to create temporary file # Or filename specify key in cloud storage so we need to download file - fd = None dependent_job = None location = location_conf.get('location') if location_conf else Location.LOCAL @@ -2346,10 +2597,13 @@ def _import_annotations(request, rq_id, rq_func, db_obj, format_name, serializer = AnnotationFileSerializer(data=request.data) if serializer.is_valid(raise_exception=True): anno_file = serializer.validated_data['annotation_file'] - fd, filename = mkstemp(prefix='cvat_{}'.format(db_obj.pk), dir=settings.TMP_FILES_ROOT) - with open(filename, 'wb+') as f: + with NamedTemporaryFile( + prefix='cvat_{}'.format(db_obj.pk), + dir=settings.TMP_FILES_ROOT, + delete=False) as tf: + filename = tf.name for chunk in anno_file.chunks(): - f.write(chunk) + tf.write(chunk) else: assert filename, 'The filename was not specified' @@ -2364,7 +2618,12 @@ def _import_annotations(request, rq_id, rq_func, db_obj, format_name, is_default=location_conf['is_default']) key = filename - fd, filename = mkstemp(prefix='cvat_{}'.format(db_obj.pk), dir=settings.TMP_FILES_ROOT) + with NamedTemporaryFile( + prefix='cvat_{}'.format(db_obj.pk), + dir=settings.TMP_FILES_ROOT, + delete=False) as tf: + filename = tf.name + dependent_job = configure_dependent_job( queue=queue, rq_id=rq_id, @@ -2373,24 +2632,29 @@ def _import_annotations(request, rq_id, rq_func, db_obj, format_name, filename=filename, key=key, request=request, + result_ttl=settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds(), + failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() ) av_scan_paths(filename) meta = { 'tmp_file': filename, - 'tmp_file_descriptor': fd, } rq_job = queue.enqueue_call( - func=rq_func, - args=(db_obj.pk, filename, format_name, conv_mask_to_poly), + func=import_resource_with_clean_up_after, + args=(rq_func, filename, db_obj.pk, format_name, conv_mask_to_poly), job_id=rq_id, depends_on=dependent_job, - meta={**meta, **get_rq_job_meta(request=request, db_obj=db_obj)} + meta={**meta, **get_rq_job_meta(request=request, db_obj=db_obj)}, + result_ttl=settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds(), + failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() ) + serializer = RqIdSerializer(data={'rq_id': rq_id}) + serializer.is_valid(raise_exception=True) + + return Response(serializer.data, status=status.HTTP_202_ACCEPTED) else: if rq_job.is_finished: - if rq_job.meta['tmp_file_descriptor']: os.close(rq_job.meta['tmp_file_descriptor']) - os.remove(rq_job.meta['tmp_file']) rq_job.delete() return Response(status=status.HTTP_201_CREATED) elif rq_job.is_failed or \ @@ -2399,9 +2663,8 @@ def _import_annotations(request, rq_id, rq_func, db_obj, format_name, # RQ adds a prefix with exception class name import_error_prefix = '{}.{}'.format( CvatImportError.__module__, CvatImportError.__name__) - if exc_info.startswith(import_error_prefix): - exc_info = exc_info.replace(import_error_prefix + ': ', '') - return Response(data=exc_info, + if import_error_prefix in exc_info: + return Response(data="The annotations that were uploaded are not correct", status=status.HTTP_400_BAD_REQUEST) else: return Response(data=exc_info, @@ -2481,7 +2744,7 @@ def _export_annotations(db_instance, rq_id, request, format_name, action, callba if osp.exists(file_path): return Response(status=status.HTTP_201_CREATED) elif rq_job.is_failed: - exc_info = str(rq_job.exc_info) + exc_info = rq_job.meta.get('formatted_exception', str(rq_job.exc_info)) rq_job.delete() return Response(exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) @@ -2509,7 +2772,7 @@ def _export_annotations(db_instance, rq_id, request, format_name, action, callba result_ttl=ttl, failure_ttl=ttl) return Response(status=status.HTTP_202_ACCEPTED) -def _import_project_dataset(request, rq_id, rq_func, db_obj, format_name, filename=None, conv_mask_to_poly=True, location_conf=None): +def _import_project_dataset(request, rq_id_template, rq_func, db_obj, format_name, filename=None, conv_mask_to_poly=True, location_conf=None): format_desc = {f.DISPLAY_NAME: f for f in dm.views.get_import_formats()}.get(format_name) if format_desc is None: @@ -2518,21 +2781,31 @@ def _import_project_dataset(request, rq_id, rq_func, db_obj, format_name, filena elif not format_desc.ENABLED: return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) + rq_id = rq_id_template.format(db_obj.pk, request.user) + queue = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) rq_job = queue.fetch_job(rq_id) - if not rq_job: - fd = None + if not rq_job or rq_job.is_finished or rq_job.is_failed: + if rq_job and (rq_job.is_finished or rq_job.is_failed): + # for some reason the previous job has not been deleted + # (e.g the user closed the browser tab when job has been created + # but no one requests for checking status were not made) + rq_job.delete() dependent_job = None location = location_conf.get('location') if location_conf else None if not filename and location != Location.CLOUD_STORAGE: serializer = DatasetFileSerializer(data=request.data) if serializer.is_valid(raise_exception=True): dataset_file = serializer.validated_data['dataset_file'] - fd, filename = mkstemp(prefix='cvat_{}'.format(db_obj.pk), dir=settings.TMP_FILES_ROOT) - with open(filename, 'wb+') as f: + with NamedTemporaryFile( + prefix='cvat_{}'.format(db_obj.pk), + dir=settings.TMP_FILES_ROOT, + delete=False) as tf: + filename = tf.name for chunk in dataset_file.chunks(): - f.write(chunk) + tf.write(chunk) + elif location == Location.CLOUD_STORAGE: assert filename, 'The filename was not specified' try: @@ -2546,7 +2819,12 @@ def _import_project_dataset(request, rq_id, rq_func, db_obj, format_name, filena is_default=location_conf['is_default']) key = filename - fd, filename = mkstemp(prefix='cvat_{}'.format(db_obj.pk), dir=settings.TMP_FILES_ROOT) + with NamedTemporaryFile( + prefix='cvat_{}'.format(db_obj.pk), + dir=settings.TMP_FILES_ROOT, + delete=False) as tf: + filename = tf.name + dependent_job = configure_dependent_job( queue=queue, rq_id=rq_id, @@ -2555,20 +2833,26 @@ def _import_project_dataset(request, rq_id, rq_func, db_obj, format_name, filena filename=filename, key=key, request=request, + result_ttl=settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds(), + failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() ) rq_job = queue.enqueue_call( - func=rq_func, - args=(db_obj.pk, filename, format_name, conv_mask_to_poly), + func=import_resource_with_clean_up_after, + args=(rq_func, filename, db_obj.pk, format_name, conv_mask_to_poly), job_id=rq_id, meta={ 'tmp_file': filename, - 'tmp_file_descriptor': fd, **get_rq_job_meta(request=request, db_obj=db_obj), }, depends_on=dependent_job, + result_ttl=settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds(), + failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() ) else: return Response(status=status.HTTP_409_CONFLICT, data='Import job already exists') - return Response(status=status.HTTP_202_ACCEPTED) + serializer = RqIdSerializer(data={'rq_id': rq_id}) + serializer.is_valid(raise_exception=True) + + return Response(serializer.data, status=status.HTTP_202_ACCEPTED) diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index fc041e332ace..90aca64db8be 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -144,12 +144,11 @@ def export(request, filter_query, queue_name): return sendfile(request, file_path, attachment=True, attachment_filename=filename) - else: if os.path.exists(file_path): return Response(status=status.HTTP_201_CREATED) elif rq_job.is_failed: - exc_info = str(rq_job.exc_info) + exc_info = rq_job.meta.get('formatted_exception', str(rq_job.exc_info)) rq_job.delete() return Response(exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) diff --git a/cvat/apps/events/views.py b/cvat/apps/events/views.py index 1941bc5c4390..a5b067ae8a84 100644 --- a/cvat/apps/events/views.py +++ b/cvat/apps/events/views.py @@ -11,9 +11,9 @@ from cvat.apps.iam.permissions import EventsPermission +from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS from cvat.apps.events.serializers import ClientEventsSerializer from cvat.apps.engine.log import vlogger -from cvat.apps.engine.schema import ORGANIZATION_OPEN_API_PARAMETERS from .export import export class EventsViewSet(viewsets.ViewSet): diff --git a/cvat/apps/iam/filters.py b/cvat/apps/iam/filters.py index 4fd6817dccd1..a0a563ed521a 100644 --- a/cvat/apps/iam/filters.py +++ b/cvat/apps/iam/filters.py @@ -3,13 +3,65 @@ # SPDX-License-Identifier: MIT from rest_framework.filters import BaseFilterBackend +from django.db.models import Q +from collections.abc import Iterable + +from drf_spectacular.utils import OpenApiParameter + +ORGANIZATION_OPEN_API_PARAMETERS = [ + OpenApiParameter( + name='org', + type=str, + required=False, + location=OpenApiParameter.QUERY, + description="Organization unique slug", + ), + OpenApiParameter( + name='org_id', + type=int, + required=False, + location=OpenApiParameter.QUERY, + description="Organization identifier", + ), + OpenApiParameter( + name='X-Organization', + type=str, + required=False, + location=OpenApiParameter.HEADER, + description="Organization unique slug", + ), +] class OrganizationFilterBackend(BaseFilterBackend): - organization_slug = 'org' - organization_slug_description = 'Organization unique slug' - organization_id = 'org_id' - organization_id_description = 'Organization identifier' - organization_slug_header = 'X-Organization' + + def _parameter_is_provided(self, request): + for parameter in ORGANIZATION_OPEN_API_PARAMETERS: + if parameter.location == 'header' and parameter.name in request.headers: + return True + elif parameter.location == 'query' and parameter.name in request.query_params: + return True + return False + + def _construct_filter_query(self, organization_fields, org_id): + if isinstance(organization_fields, str): + return Q(**{organization_fields: org_id}) + + if isinstance(organization_fields, Iterable): + # we select all db records where AT LEAST ONE organization field is equal org_id + operation = Q.OR + + if org_id is None: + # but to get all non-org objects we need select db records where ALL organization fields are None + operation = Q.AND + + filter_query = Q() + for org_field in organization_fields: + filter_query.add(Q(**{org_field: org_id}), operation) + + return filter_query + + return Q() + def filter_queryset(self, request, queryset, view): # Filter works only for "list" requests and allows to return @@ -24,16 +76,14 @@ def filter_queryset(self, request, queryset, view): if org: visibility = {'organization': org.id} - elif not org and ( - self.organization_slug in request.query_params - or self.organization_id in request.query_params - or self.organization_slug_header in request.headers - ): + elif not org and self._parameter_is_provided(request): visibility = {'organization': None} if visibility: - visibility[view.iam_organization_field] = visibility.pop('organization') - return queryset.filter(**visibility).distinct() + org_id = visibility.pop("organization") + query = self._construct_filter_query(view.iam_organization_field, org_id) + + return queryset.filter(query).distinct() return queryset @@ -41,23 +91,20 @@ def get_schema_operation_parameters(self, view): if not view.iam_organization_field or view.detail: return [] - return [ - { - 'name': self.organization_slug, - 'in': 'query', - 'description': self.organization_slug_description, - 'schema': {'type': 'string'}, - }, - { - 'name': self.organization_id, - 'in': 'query', - 'description': self.organization_id_description, - 'schema': {'type': 'integer'}, - }, - { - 'name': self.organization_slug_header, - 'in': 'header', - 'description': self.organization_slug_description, - 'schema': {'type': 'string'}, - }, - ] + parameters = [] + for parameter in ORGANIZATION_OPEN_API_PARAMETERS: + parameter_type = None + + if parameter.type == int: + parameter_type = 'integer' + elif parameter.type == str: + parameter_type = 'string' + + parameters.append({ + 'name': parameter.name, + 'in': parameter.location, + 'description': parameter.description, + 'schema': {'type': parameter_type} + }) + + return parameters diff --git a/cvat/apps/iam/permissions.py b/cvat/apps/iam/permissions.py index da32c385e4b2..f29f6d53ad98 100644 --- a/cvat/apps/iam/permissions.py +++ b/cvat/apps/iam/permissions.py @@ -14,11 +14,12 @@ from attrs import define, field from django.conf import settings from django.db.models import Q -from rest_framework.exceptions import ValidationError, PermissionDenied +from rest_framework.exceptions import PermissionDenied, ValidationError from rest_framework.permissions import BasePermission +from cvat.apps.engine.models import (CloudStorage, Issue, Job, Label, Project, + Task) from cvat.apps.organizations.models import Membership, Organization -from cvat.apps.engine.models import CloudStorage, Label, Project, Task, Job, Issue from cvat.apps.webhooks.models import WebhookTypeChoice from cvat.utils.http import make_requests_session @@ -56,14 +57,22 @@ def get_organization(request, obj): return obj if obj: - if organization_id := getattr(obj, "organization_id", None): - try: - return Organization.objects.get(id=organization_id) - except Organization.DoesNotExist: - return None - return None + try: + organization_id = getattr(obj, 'organization_id') + except AttributeError as exc: + # Skip initialization of organization for those objects that don't related with organization + view = request.parser_context.get('view') + if view and view.basename in ('user', 'function', 'request',): + return request.iam_context['organization'] + + raise exc + + try: + return Organization.objects.get(id=organization_id) + except Organization.DoesNotExist: + return None - return request.iam_context["organization"] + return request.iam_context['organization'] def get_membership(request, organization): if organization is None: @@ -80,12 +89,13 @@ def get_iam_context(request, obj): membership = get_membership(request, organization) if organization and not request.user.is_superuser and membership is None: - raise PermissionDenied({"message": "You should be an active member in the organization"}) + raise PermissionDenied({'message': 'You should be an active member in the organization'}) return { 'user_id': request.user.id, 'group_name': request.iam_context['privilege'], 'org_id': getattr(organization, 'id', None), + 'org_slug': getattr(organization, 'slug', None), 'org_owner_id': getattr(organization.owner, 'id', None) if organization else None, 'org_role': getattr(membership, 'role', None), diff --git a/cvat/apps/iam/schema.py b/cvat/apps/iam/schema.py index 8847ae227e19..8c19ce45d82c 100644 --- a/cvat/apps/iam/schema.py +++ b/cvat/apps/iam/schema.py @@ -5,9 +5,10 @@ import re import textwrap -from drf_spectacular.openapi import AutoSchema + +from drf_spectacular.authentication import SessionScheme, TokenScheme from drf_spectacular.extensions import OpenApiAuthenticationExtension -from drf_spectacular.authentication import TokenScheme, SessionScheme +from drf_spectacular.openapi import AutoSchema class SignatureAuthenticationScheme(OpenApiAuthenticationExtension): @@ -95,3 +96,4 @@ def get_operation_id(self): tokenized_path.append('formatted') return '_'.join([tokenized_path[0]] + [action] + tokenized_path[1:]) + diff --git a/cvat/apps/lambda_manager/signals.py b/cvat/apps/lambda_manager/signals.py new file mode 100644 index 000000000000..8eeafa6f5aeb --- /dev/null +++ b/cvat/apps/lambda_manager/signals.py @@ -0,0 +1,7 @@ +# Copyright (C) 2023 CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +from django.dispatch import Signal + +interactive_function_call_signal = Signal() diff --git a/cvat/apps/lambda_manager/views.py b/cvat/apps/lambda_manager/views.py index b5a0ca91824c..f3c5f564b0f4 100644 --- a/cvat/apps/lambda_manager/views.py +++ b/cvat/apps/lambda_manager/views.py @@ -19,6 +19,7 @@ import numpy as np import requests import rq +from cvat.apps.lambda_manager.signals import interactive_function_call_signal from django.conf import settings from django.core.exceptions import ObjectDoesNotExist, ValidationError from drf_spectacular.types import OpenApiTypes @@ -27,14 +28,15 @@ inline_serializer) from rest_framework import serializers, status, viewsets from rest_framework.response import Response +from rest_framework.request import Request import cvat.apps.dataset_manager as dm from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.models import Job, ShapeType, SourceType, Task from cvat.apps.engine.serializers import LabeledDataSerializer -from cvat.apps.engine.schema import ORGANIZATION_OPEN_API_PARAMETERS from cvat.utils.http import make_requests_session from cvat.apps.iam.permissions import LambdaPermission +from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS class LambdaType(Enum): @@ -194,7 +196,15 @@ def to_dict(self): return response - def invoke(self, db_task: Task, data: Dict[str, Any], *, db_job: Optional[Job] = None): + def invoke( + self, + db_task: Task, + data: Dict[str, Any], + *, + db_job: Optional[Job] = None, + is_interactive: Optional[bool] = False, + request: Optional[Request] = None + ): try: if db_job is not None and db_job.get_task_id() != db_task.id: raise ValidationError("Job task id does not match task id", @@ -299,7 +309,11 @@ def invoke(self, db_task: Task, data: Dict[str, Any], *, db_job: Optional[Job] = .format(self.id, str(err)), code=status.HTTP_400_BAD_REQUEST) + if is_interactive and request: + interactive_function_call_signal.send(sender=self, request=request) + response = self.gateway.invoke(self, payload) + response_filtered = [] def check_attr_value(value, func_attr, db_attr): if db_attr is None: @@ -788,7 +802,7 @@ def call(self, request, func_id): gateway = LambdaGateway() lambda_func = gateway.get(func_id) - return lambda_func.invoke(db_task, request.data, db_job=job) + return lambda_func.invoke(db_task, request.data, db_job=job, is_interactive=True, request=request) @extend_schema(tags=['lambda']) @extend_schema_view( diff --git a/cvat/apps/organizations/views.py b/cvat/apps/organizations/views.py index 13f757018e98..a8576273b9c3 100644 --- a/cvat/apps/organizations/views.py +++ b/cvat/apps/organizations/views.py @@ -9,10 +9,10 @@ from drf_spectacular.utils import OpenApiResponse, extend_schema, extend_schema_view from cvat.apps.engine.mixins import PartialUpdateModelMixin -from cvat.apps.engine.schema import ORGANIZATION_OPEN_API_PARAMETERS from cvat.apps.iam.permissions import ( InvitationPermission, MembershipPermission, OrganizationPermission) +from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS from .models import Invitation, Membership, Organization from .serializers import ( diff --git a/cvat/apps/webhooks/views.py b/cvat/apps/webhooks/views.py index e71aa5c06b23..afc725fa83b6 100644 --- a/cvat/apps/webhooks/views.py +++ b/cvat/apps/webhooks/views.py @@ -10,9 +10,9 @@ from rest_framework.permissions import SAFE_METHODS from rest_framework.response import Response -from cvat.apps.engine.schema import ORGANIZATION_OPEN_API_PARAMETERS from cvat.apps.engine.view_utils import list_action, make_paginated_response from cvat.apps.iam.permissions import WebhookPermission +from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS from .event_type import AllEvents, OrganizationEvents, ProjectEvents from .models import Webhook, WebhookDelivery, WebhookTypeChoice diff --git a/cvat/asgi.py b/cvat/asgi.py new file mode 100644 index 000000000000..dda3ac5ac4ec --- /dev/null +++ b/cvat/asgi.py @@ -0,0 +1,41 @@ +# Copyright (C) 2023 CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +""" +ASGI config for CVAT project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/3.2/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application +from django.core.handlers.asgi import ASGIHandler + +import cvat.utils.remote_debugger as debug + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cvat.settings.{}" \ + .format(os.environ.get("DJANGO_CONFIGURATION", "development"))) + +application = get_asgi_application() + + +if debug.is_debugging_enabled(): + class DebuggerApp(ASGIHandler): + """ + Support for VS code debugger + """ + + def __init__(self) -> None: + super().__init__() + self.__debugger = debug.RemoteDebugger() + + async def handle(self, *args, **kwargs): + self.__debugger.attach_current_thread() + return await super().handle(*args, **kwargs) + + application = DebuggerApp() diff --git a/cvat/nginx.conf b/cvat/nginx.conf new file mode 100644 index 000000000000..467b7b884e52 --- /dev/null +++ b/cvat/nginx.conf @@ -0,0 +1,90 @@ +worker_processes auto; +pid /tmp/nginx.pid; +include /etc/nginx/modules-enabled/*.conf; + +events { + worker_connections 512; +} + +error_log /home/django/logs/nginx_error.log; + +http { + + ## + # Basic Settings + ## + + sendfile on; + tcp_nopush on; + types_hash_max_size 2048; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + ## + # Override default tmp paths to run nginx from non-root user + ## + + proxy_temp_path /tmp/nginx_proxy; + uwsgi_temp_path /tmp/nginx_uwsgi; + scgi_temp_path /tmp/nginx_scgi; + fastcgi_temp_path /tmp/nginx_fastcgi; + client_body_temp_path /tmp/nginx_body; + + ## + # Logging Settings + ## + + access_log /home/django/logs/nginx_access.log; + + ## + # CVAT Settings + ## + + server { + listen 8080; + # previously used value + client_max_body_size 1G; + + server_name _; + + location /static/ { + gzip on; + gzip_comp_level 6; + gzip_http_version 1.1; + gzip_types + application/javascript + application/x-javascript + text/javascript + application/wasm + image/x-icon; + + alias /home/django/static/; + } + + location /data/ { + internal; + alias /home/django/data/; + } + + location / { + proxy_set_header Host $http_host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_redirect off; + proxy_buffering off; + proxy_pass http://uvicorn; + } + } + + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + upstream uvicorn { + server localhost:8000; + } +} diff --git a/cvat/requirements/base.in b/cvat/requirements/base.in index 933548869798..7c773219336f 100644 --- a/cvat/requirements/base.in +++ b/cvat/requirements/base.in @@ -50,7 +50,7 @@ diskcache==5.4.0 boto3==1.17.61 azure-storage-blob==12.13.0 google-cloud-storage==1.42.0 -git+https://github.com/cvat-ai/datumaro.git@0817144ade1ddc514e182ca1835e322cb9af00a0 +git+https://github.com/cvat-ai/datumaro.git@ff83c00c2c1bc4b8fdfcc55067fcab0a9b5b6b11 urllib3>=1.26.5 # not directly required, pinned by Snyk to avoid a vulnerability natsort==8.0.0 mistune>=2.0.1 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index 59339e0dd9ad..2f18e19e7dcf 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -1,4 +1,4 @@ -# SHA1:53feeaa402abed516aad4a640244c5fd1bff765a +# SHA1:d1435558d66ec49d0c691492b2f3798960ca3bba # # This file is autogenerated by pip-compile-multi # To update, run: @@ -12,7 +12,7 @@ absl-py==1.4.0 # via # tensorboard # tensorflow -asgiref==3.6.0 +asgiref==3.7.1 # via django astunparse==1.6.3 # via tensorflow @@ -58,15 +58,15 @@ coreapi==2.3.3 # via -r cvat/requirements/base.in coreschema==0.0.4 # via coreapi -croniter==1.3.14 +croniter==1.3.15 # via rq-scheduler -cryptography==40.0.2 +cryptography==41.0.0 # via # azure-storage-blob # pyjwt cycler==0.11.0 # via matplotlib -datumaro @ git+https://github.com/cvat-ai/datumaro.git@0817144ade1ddc514e182ca1835e322cb9af00a0 +datumaro @ git+https://github.com/cvat-ai/datumaro.git@ff83c00c2c1bc4b8fdfcc55067fcab0a9b5b6b11 # via -r cvat/requirements/base.in defusedxml==0.7.1 # via @@ -234,7 +234,7 @@ opt-einsum==3.3.0 # via tensorflow orderedmultidict==1.0.1 # via furl -orjson==3.8.12 +orjson==3.8.13 # via datumaro packaging==23.1 # via @@ -390,8 +390,9 @@ tensorflow-io-gcs-filesystem==0.32.0 # via tensorflow termcolor==2.3.0 # via tensorflow -typing-extensions==4.5.0 +typing-extensions==4.6.2 # via + # asgiref # azure-core # datumaro # limits @@ -402,7 +403,7 @@ uritemplate==4.1.1 # via # coreapi # drf-spectacular -urllib3==1.26.15 +urllib3==1.26.16 # via # -r cvat/requirements/base.in # botocore diff --git a/cvat/requirements/production.in b/cvat/requirements/production.in index 34c6b2a1ad0b..530545b6a4c0 100644 --- a/cvat/requirements/production.in +++ b/cvat/requirements/production.in @@ -1,2 +1,2 @@ -r base.in -mod-wsgi==4.9.4 +uvicorn[standard]==0.22.0 diff --git a/cvat/requirements/production.txt b/cvat/requirements/production.txt index fb9625f7d558..a142cfdc6af2 100644 --- a/cvat/requirements/production.txt +++ b/cvat/requirements/production.txt @@ -1,4 +1,4 @@ -# SHA1:2ed11382b8b8c472f9271bd4bd0701d52103ebd2 +# SHA1:d3d4b2262fd87a700593e22be8811e6d04230e40 # # This file is autogenerated by pip-compile-multi # To update, run: @@ -8,7 +8,23 @@ -r base.txt --no-binary av -mod-wsgi==4.9.4 +anyio==3.6.2 + # via watchfiles +h11==0.14.0 + # via uvicorn +httptools==0.5.0 + # via uvicorn +python-dotenv==1.0.0 + # via uvicorn +sniffio==1.3.0 + # via anyio +uvicorn[standard]==0.22.0 # via -r cvat/requirements/production.in +uvloop==0.17.0 + # via uvicorn +watchfiles==0.19.0 + # via uvicorn +websockets==11.0.3 + # via uvicorn # The following packages are considered to be unsafe in a requirements file: diff --git a/cvat/rqworker.py b/cvat/rqworker.py index b21a8c8f7bd0..2d68a419f95b 100644 --- a/cvat/rqworker.py +++ b/cvat/rqworker.py @@ -34,6 +34,14 @@ def main_work_horse(self, *args, **kwargs): def execute_job(self, *args, **kwargs): """Execute job in same thread/process, do not fork()""" + + # Resolves problems with + # django.db.utils.OperationalError: server closed the connection unexpectedly + # errors during debugging + # https://stackoverflow.com/questions/8242837/django-multiprocessing-and-database-connections/10684672#10684672 + from django import db + db.connections.close_all() + return self.perform_job(*args, **kwargs) diff --git a/cvat/schema.yml b/cvat/schema.yml index a46a3b97139b..2fb2c5204b6f 100644 --- a/cvat/schema.yml +++ b/cvat/schema.yml @@ -1,7 +1,7 @@ openapi: 3.0.3 info: title: CVAT REST API - version: 2.4.5 + version: 2.4.6 description: REST API for Computer Vision Annotation Tool (CVAT) termsOfService: https://www.google.com/policies/terms/ contact: @@ -338,6 +338,7 @@ paths: name: X-Organization schema: type: string + description: Organization unique slug - in: query name: org schema: @@ -796,6 +797,7 @@ paths: name: X-Organization schema: type: string + description: Organization unique slug - in: query name: org schema: @@ -991,6 +993,7 @@ paths: name: X-Organization schema: type: string + description: Organization unique slug - in: query name: org schema: @@ -1101,6 +1104,7 @@ paths: name: X-Organization schema: type: string + description: Organization unique slug - in: query name: org schema: @@ -1315,6 +1319,7 @@ paths: name: X-Organization schema: type: string + description: Organization unique slug - in: query name: org schema: @@ -1676,7 +1681,13 @@ paths: description: Format is not available post: operationId: jobs_create_annotations - summary: Method allows to upload job annotations + description: |2 + + The request POST /api/jobs/id/annotations will initiate file upload and will create + the rq job on the server in which the process of annotations uploading from file + will be carried out. Please, use the PUT /api/jobs/id/annotations endpoint for checking status of the process. + summary: Method allows to initialize the process of the job annotation upload + from a local file or a cloud storage parameters: - in: query name: cloud_storage_id @@ -1737,12 +1748,24 @@ paths: '201': description: Uploading has finished '202': + content: + application/vnd.cvat+json: + schema: + $ref: '#/components/schemas/RqId' description: Uploading has been started '405': description: Format is not available put: operationId: jobs_update_annotations - summary: Method performs an update of all annotations in a specific job + description: |2 + + To check the status of the process of uploading a job annotations from a file: + + After initiating the annotations upload, you will receive an rq_id parameter. + Make sure to include this parameter as a query parameter in your subsequent + PUT /api/jobs/id/annotations requests to track the status of the annotations upload. + summary: Method performs an update of all annotations in a specific job or used + for uploading annotations from a file parameters: - in: query name: format @@ -1758,6 +1781,11 @@ paths: type: integer description: A unique integer value identifying this job. required: true + - in: query + name: rq_id + schema: + type: string + description: rq id tags: - jobs requestBody: @@ -2052,6 +2080,7 @@ paths: name: X-Organization schema: type: string + description: Organization unique slug - name: color in: query description: A simple equality filter for the color field @@ -2337,6 +2366,7 @@ paths: name: X-Organization schema: type: string + description: Organization unique slug - in: query name: org schema: @@ -2807,6 +2837,7 @@ paths: name: X-Organization schema: type: string + description: Organization unique slug - in: query name: org schema: @@ -3051,6 +3082,14 @@ paths: /api/projects/{id}/dataset/: get: operationId: projects_retrieve_dataset + description: |2 + + To check the status of the process of importing a project dataset from a file: + + After initiating the dataset upload, you will receive an rq_id parameter. + Make sure to include this parameter as a query parameter in your subsequent + GET /api/projects/id/dataset requests to track the status of the dataset import. + Also you should specify action parameter: action=import_status. summary: Export project as a dataset in a specific format parameters: - in: query @@ -3094,6 +3133,11 @@ paths: - cloud_storage - local description: Where need to save downloaded dataset + - in: query + name: rq_id + schema: + type: string + description: rq id - in: query name: use_default_location schema: @@ -3124,7 +3168,13 @@ paths: description: Format is not available post: operationId: projects_create_dataset - summary: Import dataset in specific format as a project + description: |2 + + The request POST /api/projects/id/dataset will initiate file upload and will create + the rq job on the server in which the process of dataset import from a file + will be carried out. Please, use the GET /api/projects/id/dataset endpoint for checking status of the process. + summary: Import dataset in specific format as a project or check status of dataset + import process parameters: - in: query name: cloud_storage_id @@ -3183,6 +3233,10 @@ paths: - basicAuth: [] responses: '202': + content: + application/vnd.cvat+json: + schema: + $ref: '#/components/schemas/RqId' description: Importing has been started '400': description: Failed to import dataset @@ -3215,12 +3269,25 @@ paths: /api/projects/backup/: post: operationId: projects_create_backup + description: |2 + + The backup import process is as follows: + + The first request POST /api/projects/backup will initiate file upload and will create + the rq job on the server in which the process of a project creating from an uploaded backup + will be carried out. + + After initiating the backup upload, you will receive an rq_id parameter. + Make sure to include this parameter as a query parameter in your subsequent requests + to track the status of the project creation. + Once the project has been successfully created, the server will return the id of the newly created project. summary: Methods create a project from a backup parameters: - in: header name: X-Organization schema: type: string + description: Organization unique slug - in: query name: cloud_storage_id schema: @@ -3250,6 +3317,11 @@ paths: schema: type: integer description: Organization identifier + - in: query + name: rq_id + schema: + type: string + description: rq id tags: - projects requestBody: @@ -3270,6 +3342,10 @@ paths: '201': description: The project has been imported '202': + content: + application/vnd.cvat+json: + schema: + $ref: '#/components/schemas/RqId' description: Importing a backup file has been started /api/schema/: get: @@ -3640,6 +3716,7 @@ paths: name: X-Organization schema: type: string + description: Organization unique slug - in: query name: org schema: @@ -3825,8 +3902,13 @@ paths: description: Format is not available post: operationId: tasks_create_annotations - summary: Method allows to upload task annotations from a local file or a cloud - storage + description: |2 + + The request POST /api/tasks/id/annotations will initiate file upload and will create + the rq job on the server in which the process of annotations uploading from file + will be carried out. Please, use the PUT /api/tasks/id/annotations endpoint for checking status of the process. + summary: Method allows to initialize the process of upload task annotations + from a local or a cloud storage file parameters: - in: query name: cloud_storage_id @@ -3886,12 +3968,23 @@ paths: '201': description: Uploading has finished '202': + content: + application/vnd.cvat+json: + schema: + $ref: '#/components/schemas/RqId' description: Uploading has been started '405': description: Format is not available put: operationId: tasks_update_annotations - summary: Method allows to upload task annotations + description: |2 + + To check the status of the process of uploading a task annotations from a file: + + After initiating the annotations upload, you will receive an rq_id parameter. + Make sure to include this parameter as a query parameter in your subsequent + PUT /api/tasks/id/annotations requests to track the status of the annotations upload. + summary: Method allows to upload task annotations or edit existing annotations parameters: - in: query name: format @@ -3907,6 +4000,11 @@ paths: type: integer description: A unique integer value identifying this task. required: true + - in: query + name: rq_id + schema: + type: string + description: rq id tags: - tasks requestBody: @@ -4097,8 +4195,57 @@ paths: description: Data of a specific type post: operationId: tasks_create_data - summary: Method permanently attaches images or video to a task. Supports tus - uploads, see more https://tus.io/ + description: | + Allows to upload data to a task. + Supports the TUS open file uploading protocol (https://tus.io/). + + Supports the following protocols: + + 1. A single Data request + + and + + 2.1. An Upload-Start request + 2.2.a. Regular TUS protocol requests (Upload-Length + Chunks) + 2.2.b. Upload-Multiple requests + 2.3. An Upload-Finish request + + Requests: + - Data - POST, no extra headers or 'Upload-Start' + 'Upload-Finish' headers. + Contains data in the body. + - Upload-Start - POST, has an 'Upload-Start' header. No body is expected. + - Upload-Length - POST, has an 'Upload-Length' header (see the TUS specification) + - Chunk - HEAD/PATCH (see the TUS specification). Sent to /data/ endpoints. + - Upload-Finish - POST, has an 'Upload-Finish' header. Can contain data in the body. + - Upload-Multiple - POST, has an 'Upload-Multiple' header. Contains data in the body. + + The 'Upload-Finish' request allows to specify the uploaded files should be ordered. + This may be needed if the files can be sent unordered. To state that the input files + are sent ordered, pass an empty list of files in the 'upload_file_order' field. + If the files are sent unordered, the ordered file list is expected + in the 'upload_file_order' field. It must be a list of string file paths, + relative to the dataset root. + + Example: + files = [ + "cats/cat_1.jpg", + "dogs/dog2.jpg", + "image_3.png", + ... + ] + + Independently of the file declaration field used + ('client_files', 'server_files', etc.), when the 'predefined' + sorting method is selected, the uploaded files will be ordered according + to the '.jsonl' manifest file, if it is found in the list of files. + For archives (e.g. '.zip'), a manifest file ('*.jsonl') is required when using + the 'predefined' file ordering. Such file must be provided next to the archive + in the list of files. Read more about manifest files here: + https://opencv.github.io/cvat/docs/manual/advanced/dataset_manifest/ + + After all data is sent, the operation status can be retrieved via + the /status endpoint. + summary: Method permanently attaches data (images, video, etc.) to a task parameters: - in: header name: Upload-Finish @@ -4116,7 +4263,8 @@ paths: name: Upload-Start schema: type: boolean - description: Initializes data upload. No data should be sent with this header + description: Initializes data upload. Optionally, can include upload metadata + in the request body. - in: path name: id schema: @@ -4331,12 +4479,25 @@ paths: /api/tasks/backup/: post: operationId: tasks_create_backup + description: |2 + + The backup import process is as follows: + + The first request POST /api/tasks/backup will initiate file upload and will create + the rq job on the server in which the process of a task creating from an uploaded backup + will be carried out. + + After initiating the backup upload, you will receive an rq_id parameter. + Make sure to include this parameter as a query parameter in your subsequent requests + to track the status of the task creation. + Once the task has been successfully created, the server will return the id of the newly created task. summary: Method recreates a task from an attached task backup file parameters: - in: header name: X-Organization schema: type: string + description: Organization unique slug - in: query name: cloud_storage_id schema: @@ -4366,6 +4527,11 @@ paths: schema: type: integer description: Organization identifier + - in: query + name: rq_id + schema: + type: string + description: rq id tags: - tasks requestBody: @@ -4387,6 +4553,10 @@ paths: '201': description: The task has been imported '202': + content: + application/vnd.cvat+json: + schema: + $ref: '#/components/schemas/RqId' description: Importing a backup file has been started /api/users: get: @@ -4673,6 +4843,7 @@ paths: name: X-Organization schema: type: string + description: Organization unique slug - in: query name: org schema: @@ -5431,6 +5602,20 @@ components: default: [] description: Paths to files from a file share mounted on the server, or from a cloud storage + remote_files: + type: array + items: + type: string + minLength: 1 + maxLength: 1024 + default: [] + description: Direct download URLs for files + use_zip_chunks: + type: boolean + default: false + description: | + When true, video chunks will be represented as zip archives with decoded video frames. + When false, video chunks are represented as video segments server_files_exclude: type: array items: @@ -5451,20 +5636,6 @@ components: Exclude all cloud storage files with prefix 'sub' from the content of manifest file: server_files = ['manifest.jsonl'], server_files_exclude = ['sub/'] - remote_files: - type: array - items: - type: string - minLength: 1 - maxLength: 1024 - default: [] - description: Direct download URLs for files - use_zip_chunks: - type: boolean - default: false - description: | - When true, video chunks will be represented as zip archives with decoded video frames. - When false, video chunks are represented as video segments cloud_storage_id: type: integer writeOnly: true @@ -5523,6 +5694,23 @@ components: ["file3.png"], # job #2 files ["file4.jpg", "file5.png", "file6.bmp"], # job #3 files ] + upload_file_order: + type: array + items: + type: string + minLength: 1 + maxLength: 1024 + writeOnly: true + description: | + Allows to specify file order for client_file uploads. + Only valid with the "predefined" sorting method selected. + + To state that the input files are sent in the correct order, + pass an empty list. + + If you want to send files in an arbitrary order + and reorder them afterwards on the server, + pass the list of file names in the required order. required: - image_quality DatasetFileRequest: @@ -7327,6 +7515,13 @@ components: * `supervisor` - Supervisor * `maintainer` - Maintainer * `owner` - Owner + RqId: + type: object + properties: + rq_id: + type: string + required: + - rq_id RqStatus: type: object properties: diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 8c24978a9220..828b396631d2 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -21,6 +21,7 @@ import shutil import subprocess import sys +from datetime import timedelta from distutils.util import strtobool from enum import Enum @@ -250,8 +251,6 @@ def add_ssh_keys(): }, ] -WSGI_APPLICATION = 'cvat.wsgi.application' - # IAM settings IAM_TYPE = 'BASIC' IAM_BASE_EXCEPTION = None # a class which will be used by IAM to report errors @@ -294,6 +293,7 @@ class CVAT_QUEUES(Enum): AUTO_ANNOTATION = 'annotation' WEBHOOKS = 'webhooks' NOTIFICATIONS = 'notifications' + CLEANING = 'cleaning' RQ_QUEUES = { CVAT_QUEUES.IMPORT_DATA.value: { @@ -326,6 +326,12 @@ class CVAT_QUEUES(Enum): 'DB': 0, 'DEFAULT_TIMEOUT': '1h' }, + CVAT_QUEUES.CLEANING.value: { + 'HOST': 'localhost', + 'PORT': 6379, + 'DB': 0, + 'DEFAULT_TIMEOUT': '1h' + }, } NUCLIO = { @@ -346,7 +352,6 @@ class CVAT_QUEUES(Enum): 'cvat.apps.events.handlers.handle_rq_exception', ] - # JavaScript and CSS compression # https://django-compressor.readthedocs.io @@ -667,3 +672,7 @@ class CVAT_QUEUES(Enum): } BUCKET_CONTENT_MAX_PAGE_SIZE = 500 + +IMPORT_CACHE_FAILED_TTL = timedelta(days=90) +IMPORT_CACHE_SUCCESS_TTL = timedelta(hours=1) +IMPORT_CACHE_CLEAN_DELAY = timedelta(hours=2) diff --git a/cvat/settings/production.py b/cvat/settings/production.py index d3c99334be20..7593b08a69f5 100644 --- a/cvat/settings/production.py +++ b/cvat/settings/production.py @@ -6,10 +6,6 @@ DEBUG = False -INSTALLED_APPS += [ - 'mod_wsgi.server', -] - NUCLIO['HOST'] = os.getenv('CVAT_NUCLIO_HOST', 'nuclio') for key in RQ_QUEUES: RQ_QUEUES[key]['HOST'] = os.getenv('CVAT_REDIS_HOST', 'cvat_redis') @@ -17,4 +13,5 @@ # Django-sendfile: # https://github.com/moggers87/django-sendfile2 -SENDFILE_BACKEND = 'django_sendfile.backends.xsendfile' +SENDFILE_BACKEND = 'django_sendfile.backends.nginx' +SENDFILE_URL = '/' diff --git a/cvat/settings/testing_rest.py b/cvat/settings/testing_rest.py index 36bf80dc0a2b..5fb329732f29 100644 --- a/cvat/settings/testing_rest.py +++ b/cvat/settings/testing_rest.py @@ -10,3 +10,5 @@ PASSWORD_HASHERS = [ "django.contrib.auth.hashers.MD5PasswordHasher", ] + +IMPORT_CACHE_CLEAN_DELAY = timedelta(seconds=30) diff --git a/cvat/wsgi.py b/cvat/wsgi.py deleted file mode 100644 index 54dd33aa260c..000000000000 --- a/cvat/wsgi.py +++ /dev/null @@ -1,43 +0,0 @@ - -# Copyright (C) 2018-2022 Intel Corporation -# Copyright (C) 2022 CVAT.ai Corporation -# -# SPDX-License-Identifier: MIT - -""" -WSGI config for CVAT project. - -It exposes the WSGI callable as a module-level variable named ``application``. - -For more information on this file, see -https://docs.djangoproject.com/en/2.0/howto/deployment/wsgi/ -""" - -import os -from django.core.wsgi import get_wsgi_application - -import cvat.utils.remote_debugger as debug - - -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cvat.settings.{}" \ - .format(os.environ.get("DJANGO_CONFIGURATION", "development"))) - -application = get_wsgi_application() - - -if debug.is_debugging_enabled(): - class DebuggerApp: - """ - Support for VS code debugger - """ - - def __init__(self, obj): - self.__object = obj - self.__debugger = debug.RemoteDebugger() - - def __call__(self, *args, **kwargs): - self.__debugger.attach_current_thread() - - return self.__object(*args, **kwargs) - - application = DebuggerApp(application) diff --git a/docker-compose.yml b/docker-compose.yml index d91a72532670..47df9c9cedc2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,7 +25,7 @@ services: cvat_server: container_name: cvat_server - image: cvat/server:${CVAT_VERSION:-v2.4.5} + image: cvat/server:${CVAT_VERSION:-v2.4.6} restart: always depends_on: - cvat_redis @@ -39,13 +39,13 @@ services: ADAPTIVE_AUTO_ANNOTATION: 'false' IAM_OPA_BUNDLE: '1' no_proxy: clickhouse,grafana,vector,nuclio,opa,${no_proxy:-} - NUMPROCS: 1 + NUMPROCS: 2 DJANGO_LOG_SERVER_HOST: vector DJANGO_LOG_SERVER_PORT: 80 CLICKHOUSE_HOST: clickhouse CVAT_ANALYTICS: 1 CVAT_BASE_URL: - command: -c supervisord/server.conf + entrypoint: /home/django/backend_entrypoint.sh labels: - traefik.enable=true - traefik.http.services.cvat.loadbalancer.server.port=8080 @@ -63,7 +63,7 @@ services: cvat_utils: container_name: cvat_utils - image: cvat/server:${CVAT_VERSION:-v2.4.5} + image: cvat/server:${CVAT_VERSION:-v2.4.6} restart: always depends_on: - cvat_redis @@ -77,6 +77,7 @@ services: DJANGO_LOG_SERVER_HOST: vector DJANGO_LOG_SERVER_PORT: 80 no_proxy: clickhouse,grafana,vector,nuclio,opa,${no_proxy:-} + NUMPROCS: 1 command: -c supervisord/utils.conf volumes: - cvat_data:/home/django/data @@ -87,7 +88,7 @@ services: cvat_worker_import: container_name: cvat_worker_import - image: cvat/server:${CVAT_VERSION:-v2.4.5} + image: cvat/server:${CVAT_VERSION:-v2.4.6} restart: always depends_on: - cvat_redis @@ -109,7 +110,7 @@ services: cvat_worker_export: container_name: cvat_worker_export - image: cvat/server:${CVAT_VERSION:-v2.4.5} + image: cvat/server:${CVAT_VERSION:-v2.4.6} restart: always depends_on: - cvat_redis @@ -132,7 +133,7 @@ services: cvat_worker_annotation: container_name: cvat_worker_annotation - image: cvat/server:${CVAT_VERSION:-v2.4.5} + image: cvat/server:${CVAT_VERSION:-v2.4.6} restart: always depends_on: - cvat_redis @@ -155,7 +156,7 @@ services: cvat_worker_webhooks: container_name: cvat_worker_webhooks - image: cvat/server:${CVAT_VERSION:-v2.4.5} + image: cvat/server:${CVAT_VERSION:-v2.4.6} restart: always depends_on: - cvat_redis @@ -178,7 +179,7 @@ services: cvat_ui: container_name: cvat_ui - image: cvat/ui:${CVAT_VERSION:-v2.4.5} + image: cvat/ui:${CVAT_VERSION:-v2.4.6} restart: always depends_on: - cvat_server diff --git a/helm-chart/Chart.yaml b/helm-chart/Chart.yaml index 91c7a682e46a..a667c976f3f0 100644 --- a/helm-chart/Chart.yaml +++ b/helm-chart/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.7.2 +version: 0.7.3 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/helm-chart/templates/cvat_backend/server/deployment.yml b/helm-chart/templates/cvat_backend/server/deployment.yml index d8add5a5e60a..cc0b5c396a08 100644 --- a/helm-chart/templates/cvat_backend/server/deployment.yml +++ b/helm-chart/templates/cvat_backend/server/deployment.yml @@ -51,7 +51,7 @@ spec: resources: {{- toYaml . | nindent 12 }} {{- end }} - args: ["-c", "supervisord/server.conf"] + command: ["/home/django/backend_entrypoint.sh"] env: - name: ALLOWED_HOSTS value: {{ .Values.cvat.backend.server.envs.ALLOWED_HOSTS | squote}} diff --git a/helm-chart/values.yaml b/helm-chart/values.yaml index 38c3ac094a72..ea51644cbab7 100644 --- a/helm-chart/values.yaml +++ b/helm-chart/values.yaml @@ -75,7 +75,7 @@ cvat: additionalVolumeMounts: [] replicas: 1 image: cvat/server - tag: v2.4.5 + tag: v2.4.6 imagePullPolicy: Always permissionFix: enabled: true @@ -95,7 +95,7 @@ cvat: frontend: replicas: 1 image: cvat/ui - tag: v2.4.5 + tag: v2.4.6 imagePullPolicy: Always labels: {} # test: test diff --git a/mod_wsgi.conf b/mod_wsgi.conf deleted file mode 100644 index 09e615f81404..000000000000 --- a/mod_wsgi.conf +++ /dev/null @@ -1,16 +0,0 @@ -LoadModule xsendfile_module /usr/lib/apache2/modules/mod_xsendfile.so -XSendFile On -XSendFilePath ${HOME}/data/ -XSendFilePath ${HOME}/static/ - -# The presence of an Apache output filter (mod_xsendfile) causes mod_wsgi -# to internally convert HEAD requests to GET before passing them to the -# application, for reasons explained here: -# . -# However, we need HEAD requests passed through as-is, because the TUS -# protocol requires them. It should be safe to disable this functionality in -# our case, because mod_xsendfile does not examine the response body (it -# either passes it through or discards it entirely based on the headers), -# so it shouldn't matter whether the application omits the body in response -# to a HEAD request. -WSGIMapHEADToGET Off diff --git a/serverless/deploy_cpu.sh b/serverless/deploy_cpu.sh index 8fc1dacc453c..03d6f17bad67 100755 --- a/serverless/deploy_cpu.sh +++ b/serverless/deploy_cpu.sh @@ -24,7 +24,8 @@ do fi echo "Deploying $func_rel_path function..." - nuctl deploy --project-name cvat --path "$func_root" --platform local + nuctl deploy --project-name cvat --path "$func_root" \ + --file "$func_config" --platform local done nuctl get function --platform local diff --git a/site/content/en/docs/administration/advanced/analytics.md b/site/content/en/docs/administration/advanced/analytics.md index cc468582e931..4191e42677b4 100644 --- a/site/content/en/docs/administration/advanced/analytics.md +++ b/site/content/en/docs/administration/advanced/analytics.md @@ -1,24 +1,400 @@ - - --- - -title: 'Installation Analytics' -linkTitle: 'Installation Analytics' +title: 'CVAT Analytics and monitoring' +linkTitle: 'CVAT Analytics and monitoring' weight: 20 -description: 'Instructions for deployment and customization of Analytics. This section on [GitHub](https://github.com/cvat-ai/cvat/tree/develop/components/analytics).' - +description: 'Instructions for deployment and customization of analytics and monitoring.' --- - +CVAT Analytics suite of tools is designed to track and understand users' behavior, system performance, +and for identifying potential issues in your application. + +You can also visualize user activity through Grafana, and aggregate user +working time by the jobs. + +Gathered logs can be additionally filtered for efficient debugging. + +By using analytics, you'll gain valuable insights to optimize your system +and enhance user satisfaction. + +CVAT analytics are available from the top menu. + +![CVAT Analytics](/images/analytics_menu.jpg) + +> Note: CVAT analytics and monitoring are available only for on-prem solution. + +See: + +- [High-level architecture](#high-level-architecture) +- [CVAT Analytics](#cvat-analytics) + - [Ports settings](#ports-settings) + - [Events log structure](#events-log-structure) + - [Types of supported events](#types-of-supported-events) + - [Request `id` for tracking](#request-id-for-tracking) + - [Fetching event data as CSV from the `/api/events` endpoint](#fetching-event-data-as-csv-from-the-apievents-endpoint) +- [Dashboards](#dashboards) + - [Dashboard: All Events](#dashboard-all-events) + - [Dashboard: Management](#dashboard-management) + - [Dashboard: Monitoring](#dashboard-monitoring) + - [Dashboards setup](#dashboards-setup) + +## High-level architecture + +The CVAT analytics is based on Vector, ClickHouse, and Grafana. + +![CVAT Analytics](/images/analytic_architecture.jpg) + +## CVAT Analytics + +CVAT and its analytics module can be set up locally, for +self-hosted solution analytics are enabled by default. -![](/images/image097.jpg) +> For detailed instructions for CVAT installation, see [Installation Guide](https://opencv.github.io/cvat/docs/administration/basics/installation/) +> or refer to the [CVAT Course](https://www.youtube.com/playlist?list=PL0to7Ng4PuuYQT4eXlHb_oIlq_RPeuasN) +> for installation videos. -It is possible to proxy annotation events from the UI to the Clickhouse database and use Grafana for visualization. -This feature is enabled by default and all required containers will be launched when starting CVAT with: +All analytics-related features will be launched when you +start CVAT containers with the following command: ```shell docker compose up -d ``` -The previous solution based on ELK stack is currently deprecated and will no longer be supported. +### Ports settings + +If you cannot access analytics on +development environnement, +see [Analytics Ports](/docs/contributing/development-environment/#cvat-analytics-ports) + +### Events log structure + +[Relational database](https://github.com/opencv/cvat/blob/develop/components/analytics/clickhouse/init.sh) +schema with the following fields: + + + +| Field | Description | +| ---------- | ------------------------------------------------------------------------------------------------ | +| scope | Scope of the event (e.g., `zoomin:image`, `add:annotations`, `delete:image`, `update:assignee`). | +| obj_name | Object name or None (e.g., task, job, cloudstorage, model, organization). | +| obj_id | Object identifier as in DB or None. | +| obj_val | Value for the event as string or None (e.g., frame number, number of added annotations). | +| source | Who generates the log event (e.g., server, ui). | +| timestamp | Local event time (in general for UI and server, the time is different). | +| count | How many times in the row it occurs. | +| duration | How much time does it take (it can be 0 for events without duration). | +| project_id | Project ID or None. | +| task_id | Task ID or None. | +| job_id | Job ID or None. | +| user_id | User ID or None. | +| user_name | User name or None. | +| user_email | User email or None. | +| org_id | Organization ID or None. | +| org_slug | Organization slug or None. | +| payload | JSON payload or None. Extra fields can be added to the JSON blob. | + + + +### Types of supported events + +Supported events change the scope of information displayed in Grafana. + +![Supported Events](/images/supported_events.jpg) + + + +Server events: + +- `create:project`, `update:project`, `delete:project` + +- `create:task`, `update:task`, `delete:task` + +- `create:job`, `update:job`, `delete:job` + +- `create:organization`, `update:organization`, `delete:organization` + +- `create:user`, `update:user`, `delete:user` + +- `create:cloudstorage`, `update:cloudstorage`, `delete:cloudstorage` + +- `create:issue`, `update:issue`, `delete:issue` + +- `create:comment`, `update:comment`, `delete:comment` + +- `create:annotations`, `update:annotations`, `delete:annotations` + +- `create:label`, `update:label`, `delete:label` + +Client events: + +- `load:cvat` + +- `load:job`, `save:job`, `restore:job` +- `upload:annotations` +- `send:exception` +- `send:task_info` + +- `draw:object`, `paste:object`, `copy:object`, `propagate:object`, `drag:object`, `resize:object`, `delete:object`, `lock:object`, `merge:objects` +- `change:attribute` +- `change:label` + +- `change:frame` +- `move:image`, `zoom:image`, `fit:image`, `rotate:image` + +- `action:undo`, `action:redo` + +- `press:shortcut` +- `send:debug_info` + +- `click:element` + + + +### Request `id` for tracking + +Note, that every response to an API request made to the +the server includes a header named `X-Request-Id`, +for example: `X-Request-Id: 6a2b7102-c4b9-4d57-8754-5658132ba37d`. + +This identifier is also recorded in all server events that +occur as a result of the respective request. + +For example, when an operation to create a task is performed, +other related entities such as labels and attributes are +generated on the server in addition to the **Task** object. + +All events associated with this operation will have the same `request_id` in +the payload field. + +### Fetching event data as CSV from the `/api/events` endpoint + + + +The `/api/events` endpoint allows the fetching of +event data with filtering parameters such as +`org_id`, `project_id`, `task_id`, `job_id`, and `user_id`. + +For more details, +see [Swagger API Documentation](https://app.cvat.ai/api/swagger/#/events/events_list). + +For example, to fetch all events associated with a specific job, +the following `curl` command can be used: + +```bash +curl --user 'user:pass' https://app.cvat.ai/api/events?job_id=123 +``` + +In the response, you will receive a query ID: + +```json +{ "query_id": "150cac1f-09f1-4d73-b6a5-5f47aa5d0031" } +``` + +As this process may take some time to complete, +the status of the request can be checked by +adding the query parameter `query_id` to the request: + +```bash +curl -I --user 'user:pass' https://app.cvat.ai/api/events?job_id=123&query_id=150cac1f-09f1-4d73-b6a5-5f47aa5d0031 +``` + +Upon successful creation, the server will return a `201 Created` status: + +``` +HTTP/2 201 +allow: GET, POST, HEAD, OPTIONS +date: Tue, 16 May 2023 13:38:42 GMT +referrer-policy: same-origin +server: Apache +vary: Accept,Origin,Cookie +x-content-type-options: nosniff +x-frame-options: DENY +x-request-id: 4631f5fa-a4f0-42a8-b77b-7426fc298a85 +``` + +The CSV file can be downloaded by +adding the `action=download` query parameter to the request: + +```bash +curl --user 'user:pass' https://app.cvat.ai/api/events?job_id=123&query_id=150cac1f-09f1-4d73-b6a5-5f47aa5d0031&action=download > /tmp/events.csv +``` + +This will download and save the file to `/tmp/events.csv` +on your local machine. + + + +## Dashboards + +By default, three dashboards are available in CVAT. + +To access them, click **General**, you will be forwarded to the +page with available dashboards. + +![List of dashboards](/images/dashboard_00.jpg) + + + +| Dashboard | Description | +| -------------- | --------------------------------------------------------------------------------------- | +| **All Events** | Dashboard that shows all event logs, timestamps, and source. | +| **Management** | Dashboard with information about user activities such as working time by job and so on. | +| **Monitoring** | Dashboard showing server logs, including errors. | + + + +### Dashboard: All Events + +The dashboard shows all events, their timestamps, and their source. + +![Dashboard: All Events](/images/dashboard_01.jpg) + + + +| Element | Description | +| -------------------- | -------------------------------------------------------------------------------------------------------------------------- | +| **Filters** | Can be used as drop-down lists or search fields. Click on the arrow to activate. | +| **Overall activity** | Graph that shows the overall activity by the selected filters. | +| **Scope** | Users' activity, see [Types of supported events](#types-of-supported-events). | +| **obj_name** | Object or item related to the **Scope**. | +| **obj_id** | Object's id. Might be empty. | +| **source** | Source of the event, can be `client` or `server`. | +| **timestamp** | Time when the event happened. | +| **count** | Common field for all events, not null where it makes sense, for example, the
number of saved objects in an annotation. | +| **duration** | Duration in milliseconds. | +| **project_id** | Id of the project. | +| **project_id** | Id of the project. | +| **task_id** | ID of the task. | +| **job_id** | ID of the job. | + + + +There are two fields with statistics at the bottom of the dashboard, +about browser and OS users use. + +Every column name can be used like a filter. + +If you want to inspect the value, hover over it and click +on the eye icon. + +### Dashboard: Management + +The dashboard shows user activity. + +![Dashboard: Management](/images/dashboard_02.jpg) + + + +| Element | Description | +| -------------------- | ------------------------------------------------------------------------------------------------------------------------------- | +| **Filters** | Can be used as drop-down lists or search fields. Click on the arrow to activate. | +| **User activity** | Graph that shows when the user was active (data and time), click on the user id below, to see the graph for the dedicated user. | +| **Overall activity** | Graph shows common activity for all users. | +| **User** | User ID. | +| **Project** | Project ID. Might be empty. | +| **Task** | Task ID. Might be empty. | +| **Job** | Job ID. Might be empty. | +| **Working time(h)** | Time spent on task in hours. | +| **Activity** | Number of events for each user. | + + + +Every column name can be used like a filter. + +If you want to inspect the value, hover over it and click +on the eye icon. + +### Dashboard: Monitoring + +The dashboard shows server logs, helps handle errors, and shows user activity. + +![Dashboard: Monitoring](/images/dashboard_03.jpg) + + + +| Element | Description | +| ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Filters** | Can be used as drop-down lists or search fields. Click on the arrow to activate. | +| **Active users (now)** | Number of active users on an instance. | +| **Overall activity** | Graph that shows the number of active users. | +| **Exceptions** | Graph that shows the number of errors that happened in the instance. | +| **timestamp** | Time when the error happened. | +| **user_id** | User ID. | +| **user_name** | User nickname. | +| **project_id** | Id of the project. Might be empty. | +| **task_id** | Task ID. Might be empty. | +| **job_id** | Job ID. Might be empty. | +| **error** | Error description | +| **stack** | Error description | +| **payload** | Error description | +| **stack** | Stack trace, which is a report of the active stack frames at a certain point in time during the execution. This information is typically used for debugging purposes to locate where an issue occurred. | +| **payload** | JSON that describes the entire object, which contains several properties. This data in the payload is related to an event that was created as a result of a failed API request. The payload contains information about this event. | + + + +Every column name can be used like a filter. + +If you want to inspect the value, hover over it and click +on the eye icon. + +### Dashboards setup + +You can adjust the dashboards. To do this, click on the +graph or table name and from the drop-down menu select **Edit**. + +Adjust the query in the editor. + +![Dashboard: look and feel](/images/dashboard_04.jpg) + +Example of query: + +```sql +SELECT + time, + uniqExact(user_id) Users +FROM +( + SELECT + user_id, + toStartOfInterval(timestamp, INTERVAL 15 minute) as time + FROM cvat.events + WHERE + user_id IS NOT NULL + GROUP BY + user_id, + time + ORDER BY time ASC WITH FILL STEP toIntervalMinute(15) +) +GROUP BY time +ORDER BY time +``` + +> **Note**, that by default the updated configuration will not be saved +> and will be reset to the default parameters after you restart the +> container. + +To save the updated configuration, do the following: + +1. **Update Configuration**: Start by making your desired changes in the query. + +2. **Apply Changes**: Once you've made your changes, + click the **Apply** button to ensure the changes are implemented. + + ![Apply changes](/images/apply.jpg) + +3. **Save Configuration**: To save your applied changes, on the top of the dashboard, + click the **Save** button. + + ![Apply changes](/images/save_results.jpg) + +4. **Replace Configuration File**: After saving, replace the existing + Grafana dashboard configuration file is located at + `components/analytics/grafana/dashboards` with the new JSON configuration file. + + ![Apply changes](/images/save_json.jpg) + +5. **Restart Grafana Service**: To ensure, that all changes take effect, + restart the Grafana service. If you're using Docker Compose, + execute the following command: `docker compose restart cvat_grafana`. +For more information, +see [Grafana Dashboards](https://grafana.com/docs/grafana/latest/dashboards/). diff --git a/site/content/en/docs/administration/advanced/webhooks.md b/site/content/en/docs/administration/advanced/webhooks.md index ade7a521b361..a29081fb2069 100644 --- a/site/content/en/docs/administration/advanced/webhooks.md +++ b/site/content/en/docs/administration/advanced/webhooks.md @@ -1,46 +1,115 @@ --- title: 'Webhooks' linkTitle: 'Webhooks' -description: 'Instructions for working with CVAT Webhooks' +description: 'CVAT Webhooks: set up and use' weight: 80 --- +Webhooks are user-defined HTTP callbacks that are triggered by specific events. +When an event that triggers a webhook occurs, CVAT makes an HTTP request +to the URL configured for the webhook. +The request will include a payload with information about the event. + +CVAT, webhooks can be triggered by a variety of events, +such as the creation, deletion, or modification of tasks, +jobs, and so on. +This makes it easy to set up automated processes +that respond to changes made in CVAT. + +For example, you can set up webhooks to alert you when a job's assignee is changed or when +a job/task's status is updated, for instance, when a job is completed and ready for review +or has been reviewed. New task creation can also trigger notifications. + +These capabilities allow you to keep track of progress and changes in your CVAT workflow instantly. + +In CVAT you can create a webhook for a project or organization. +You can use CVAT GUI or direct API calls. + +See: + +- [Create Webhook](#create-webhook) + - [For project](#for-project) + - [For organization](#for-organization) + - [Webhooks forms](#webhooks-forms) + - [List of events](#list-of-events) +- [Payloads](#payloads) + - [Create event](#create-event) + - [Update event](#update-event) + - [Delete event](#delete-event) +- [Webhook secret](#webhook-secret) +- [Ping Webhook](#ping-webhook) +- [Webhooks with API calls](#webhooks-with-api-calls) +- [Example of setup and use](#example-of-setup-and-use) + ## Create Webhook -In CVAT you can create webhook for project or for organization. -For creation, you can use our user interface or direct API calls. +### For project + +To create a webhook for **Project**, do the following: + +1. [Create a Project](/docs/manual/advanced/projects/). +2. Go to the **Projects** and click on the project's widget. +3. In the top right corner, click **Actions** > **Setup Webhooks**. +4. In the top right corner click **+** + + ![Create Project Webhook](/images/create_project_webhook.gif) + +5. Fill in the **[Setup webhook](#webhooks-forms)** form and click **Submit**. + +### For organization + +To create a webhook for **Organization**, do the following: + +1. [Create Organization](/docs/manual/advanced/organization/) +2. Go to the **Organization** > **Settings** > **Actions** > **Setup Webhooks**. +3. In the top right corner click **+** + + ![](/images/create_organization_webhook.gif) -In order to create webhook via an API call, see the [swagger documentation](https://app.cvat.ai/api/docs). -And also see examples of creating webhooks in our [REST API tests](https://github.com/opencv/cvat/blob/develop/tests/python/rest_api/test_webhooks.py). +4. Fill in the **[Setup webhook](#webhooks-forms)** form and click **Submit**. -### Create Webhook for project +### Webhooks forms -To create webhook for CVAT project, follow the steps: +The **Setup a webhook** forms look like the following. -`Project -> Actions -> Setup Webhooks` +![Create Project And Org Webhook Forms ](/images/webhook_form_project_org.jpg) -![](/images/create_project_webhook.gif) +Forms have the following fields: -### Create Webhook for organization + -To create webhook for CVAT organization, follow the steps: +| Field | Description | +| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Target URL | The URL where the event data will be sent. | +| Description | Provides a brief summary of the webhook's purpose. | +| Project | A drop-down list that lets you select from available projects. | +| Content type | Defines the data type for the payload in the webhook request via the HTTP Content-Type field. | +| Secret | A unique key for verifying the webhook's origin, ensuring it's genuinely from CVAT.
For more information, see [Webhook secret](#webhook-secret) | +| Enable SSL | A checkbox for enabling or disabling [SSL verification](https://en.wikipedia.org/wiki/Public_key_certificate). | +| Active | Uncheck this box if you want to stop the delivery of specific webhook payloads. | +| Send everything | Check this box to send all event types through the webhook. | +| Specify individual events | Choose this option to send only certain event types.
Refer to the [List of available events](#list-of-available-events) for more information on event types. | -`Organization -> Settings -> Actions -> Setup Webhooks` + -![](/images/create_organization_webhook.gif) +### List of events -## List of available events +The following events are available for webhook alerts. -| Resource | Create | Update | Delete | -| :---: | :----: | :----: | :----: | -| Organization | | ✅ | | -| Membership | | ✅ | ✅ | -| Invitation | ✅ | | ✅ | -| Project | ✅ | ✅ | ✅ | -| Task | ✅ | ✅ | ✅ | -| Job | | ✅ | | -| Issue | ✅ | ✅ | ✅ | -| Comment | ✅ | ✅ | ✅ | + + +| Resource | Create | Update | Delete | Description | +| ------------ | ------ | ------ | ------ | ----------------------------------------------------------------------------------- | +| Organization | | ✅ | | Alerts for changes made to an Organization. | +| Membership | | ✅ | ✅ | Alerts when a member is added to or removed from an organization. | +| Invitation | ✅ | | ✅ | Alerts when an invitation to an Organization is issued or revoked. | +| Project | ✅ | ✅ | ✅ | Alerts for any actions taken within a project. | +| Task | ✅ | ✅ | ✅ | Alerts for actions related to a task, such as status changes, assignments, etc. | +| Job | | ✅ | | Alerts for any updates made to a job. | +| Issue | ✅ | ✅ | ✅ | Alerts for any activities involving issues. | +| Comment | ✅ | ✅ | ✅ | Alerts for actions involving comments, such as creation, deletion, or modification. | + + ## Payloads @@ -48,17 +117,22 @@ To create webhook for CVAT organization, follow the steps: Webhook payload object for `create:` events: -| Key | Type | Description | -| :---: | :----: | :---- | -| `event` | `string` | Name of event that triggered webhook with pattern `create:` | -| `` | `object` | Full information about created resource. See the swagger docs for each separate resource | -| `webhook_id` | `integer` | Identifier of webhook that sent payload | -| `sender` | `object` | Information about user that triggered webhook | + + +| Key | Type | Description | +| ------------ | --------- | --------------------------------------------------------------------------------------------------------------------------------------- | +| `event` | `string` | Identifies the event that triggered the webhook, following the `create:` pattern. | +| `` | `object` | Complete information about the created resource. Refer to the [Swagger](#webhooks-with-api-calls) docs for individual resource details. | +| `webhook_id` | `integer` | The identifier for the webhook that sends the payload. | +| `sender` | `object` | Details about the user that triggered the webhook. | + + + +An example of payload for the `create:task` event: -Here is example of payload for `create:task` event: {{< scroll-code lang="json" >}} { - "event": "create:task", + "event": "create:task", "task": { "url": "", "id": 15, @@ -116,18 +190,23 @@ Here is example of payload for `create:task` event: } {{< /scroll-code >}} - ### Update event Webhook payload object for `update:` events: -| Key | Type | Description | -| :---: | :----: | :---- | -| `event` | `string` | Name of event that triggered webhook with pattern `update:` | -| `` | `object` | Full information about updated resource. See the swagger docs for each separate resource | -| `before_update` | `object` | Keys of `` that was updated with theirs old values | -| `webhook_id` | `integer` | Identifier of webhook that sent payload | -| `sender` | `object` | Information about user that triggered webhook | + + +| Key | Type | Description | +| --------------- | --------- | ---------------------------------------------------------------------------------------------------- | +| `event` | `string` | Identifies the event that triggered the webhook, following the `update:` pattern. | +| `` | `object` | Provides complete information about the updated resource. See the Swagger docs for resource details. | +| `before_update` | `object` | Contains keys of `` that were updated, along with their old values. | +| `webhook_id` | `integer` | The identifier for the webhook that dispatched the payload. | +| `sender` | `object` | Details about the user that triggered the webhook. | + + + +An example of `update:` event: {{< scroll-code lang="json" >}} { @@ -212,18 +291,24 @@ Webhook payload object for `update:` events: } } {{< /scroll-code >}} + ### Delete event Webhook payload object for `delete:` events: -| Key | Type | Description | -| :---: | :----: | :---- | -| `event` | `string` | Name of event that triggered webhook with pattern `delete:` | -| `` | `object` | Full information about deleted resource. See the swagger docs for each separate resource | -| `webhook_id` | `integer` | Identifier of webhook that sent payload | -| `sender` | `object` | Information about user that triggered webhook | + + +| Key | Type | Description | +| ------------ | --------- | ---------------------------------------------------------------------------------------------------- | +| `event` | `string` | Identifies the event that triggered the webhook, following the `delete:` pattern. | +| `` | `object` | Provides complete information about the deleted resource. See the Swagger docs for resource details. | +| `webhook_id` | `integer` | The identifier for the webhook that dispatched the payload. | +| `sender` | `object` | Details about the user that triggered the webhook. | + + + +Here is an example of the payload for the `delete:task` event: -Here is example of payload for `delete:task` event: {{< scroll-code lang="json" >}} { "event": "delete:task", @@ -286,16 +371,17 @@ Here is example of payload for `delete:task` event: ## Webhook secret -To be ensure that webhooks come from CVAT you can specify `secret` when creating a webhook. +To validate that the webhook requests originate from CVAT, include a `secret` during the webhook creation process. -If you specified `secret` value for webhook, then CVAT will sent webhook with `X-Signature-256` in -request header. +When a `secret` is provided for the webhook, CVAT includes an `X-Signature-256` in the request header of the webhook. -CVAT encode request body for webhook using SHA256 hash function and put the result into the header. +CVAT uses the SHA256 hash function to encode the request +body for the webhook and places the resulting hash into the header. -Webhook receiver can check that request came from CVAT by comparison received value of `X-Signature-256` with expected. +The webhook recipient can verify the source of the request +by comparing the received `X-Signature-256` value with the expected value. -Example of header value for empty request body and `secret = mykey`: +Here's an example of a header value for a request with an empty body and `secret = mykey`: ``` X-Signature-256: e1b24265bf2e0b20c81837993b4f1415f7b68c503114d100a40601eca6a2745f @@ -327,22 +413,31 @@ def webhook(): ## Ping Webhook -To check that webhook configured well and CVAT can connect with target URL you can use `ping` webhook. +To confirm the proper configuration of your webhook and ensure that CVAT can establish +a connection with the target URL, use the **Ping** webhook feature. + +![Ping Webhook ](/images/ping_webhook.jpg) -After pressing `Ping` bottom on UI (or sending `POST /webhooks/{id}/ping` request) CVAT will sent webhook -to the target url with general information about webhook. +1. Click the **Ping** button in the user interface (or send a `POST /webhooks/{id}/ping` request through API). +2. CVAT will send a webhook alert to the specified target URL with basic information about the webhook. Ping webhook payload: -| Key | Type | Description | -| :---: | :----: | :---- | -| `event` | `string` | Value always equals `ping` | -| `webhook` | `object` | Full information about webhook. See the full description of webhook`s fields in swagger docs | -| `sender` | `object` | Information about user that called `ping` webhook | + + +| Key | Type | Description | +| --------- | -------- | -------------------------------------------------------------------------------------------------- | +| `event` | `string` | The value is always `ping`. | +| `webhook` | `object` | Complete information about the webhook. See the Swagger docs for a detailed description of fields. | +| `sender` | `object` | Information about the user who initiated the `ping` on the webhook. | + + + +Here is an example of a payload for the `ping` event: {{< scroll-code lang="json" >}} { - "event": "ping", + "event": "ping", "webhook": { "id": 7, "url": "", @@ -388,3 +483,22 @@ Ping webhook payload: } } {{< /scroll-code >}} + +## Webhooks with API calls + +To create webhook via an API call, +see [Swagger documentation](https://app.cvat.ai/api/docs). + +For examples, +see [REST API tests](https://github.com/opencv/cvat/blob/develop/tests/python/rest_api/test_webhooks.py). + +## Example of setup and use + + +This video demonstrates setting up email alerts for a project using Zapier and Gmail. + + + + + + diff --git a/site/content/en/docs/api_sdk/sdk/lowlevel-api.md b/site/content/en/docs/api_sdk/sdk/lowlevel-api.md index c7460ca163c1..e1629bb00ee9 100644 --- a/site/content/en/docs/api_sdk/sdk/lowlevel-api.md +++ b/site/content/en/docs/api_sdk/sdk/lowlevel-api.md @@ -347,19 +347,14 @@ You can find many examples of API client usage in REST API tests [here](https:// ### Organizations -To call an operation in the context of an organization, use one of these method arguments: +To create resource in the context of an organization, use one of these method arguments: - `org` - The unique organization slug - `org_id`- The organization id ```python ... -(updated_annotations, response) = api_client.tasks_api.partial_update_annotations( - id=task_id, - org_id=org_id, - action='update', - patched_labeled_data_request=data -) +(task, response) = api_client.tasks_api.create(task_spec, org_id=org_id) ``` ### Paginated responses diff --git a/site/content/en/docs/contributing/development-environment.md b/site/content/en/docs/contributing/development-environment.md index 5a897086fa61..9546fb900290 100644 --- a/site/content/en/docs/contributing/development-environment.md +++ b/site/content/en/docs/contributing/development-environment.md @@ -4,6 +4,7 @@ linkTitle: 'Development environment' weight: 2 description: 'Installing a development environment for different operating systems.' --- + ### Setup the dependencies: - Install necessary dependencies: @@ -11,7 +12,7 @@ description: 'Installing a development environment for different operating syste Ubuntu 18.04 ```bash - sudo apt-get update && sudo apt-get --no-install-recommends install -y build-essential curl git redis-server python3-dev python3-pip python3-venv python3-tk libldap2-dev libsasl2-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev apache2-dev + sudo apt-get update && sudo apt-get --no-install-recommends install -y build-essential curl git redis-server python3-dev python3-pip python3-venv python3-tk libldap2-dev libsasl2-dev pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libswscale-dev libswresample-dev libavfilter-dev ``` ```bash @@ -28,6 +29,7 @@ description: 'Installing a development environment for different operating syste ``` Arch Linux + ```bash # Update the system and AUR (you can use any other AUR helper of choice) first: sudo pacman -Syyu @@ -67,6 +69,7 @@ description: 'Installing a development environment for different operating syste - [Code Spell Checker](https://marketplace.visualstudio.com/items?itemName=streetsidesoftware.code-spell-checker) - Make sure to use Python 3.9.0 or higher + ``` python3 --version ``` @@ -104,6 +107,7 @@ description: 'Installing a development environment for different operating syste > > Homebrew will install FFMpeg 5.0 by default, which does not work, so you should install 4.X. > You can install older 4.X FFMpeg using Homebrew like that: + > > ``` > cd "$(brew --repo homebrew/core)" > git checkout addd616edc9134f057e33694c420f4900be59db8 @@ -111,8 +115,10 @@ description: 'Installing a development environment for different operating syste > HOMEBREW_NO_AUTO_UPDATE=1 brew install ffmpeg > git checkout master > ``` + > > if you are still facing error `Running setup.py install for av ... error`, you may > try more radical variant + > > ``` > cd "$(brew --repo homebrew/core)" > git checkout addd616edc9134f057e33694c420f4900be59db8 @@ -122,21 +128,25 @@ description: 'Installing a development environment for different operating syste > ``` > > If you faced with error `Failed building wheel for h5py`, you may need install `hdf5` + > > ``` > brew install hdf5 > export HDF5_DIR="$(brew --prefix hdf5)" > pip install --no-binary=h5py h5py > ``` + > > If you faced with error > `OSError: Could not find library geos_c or load any of its variants ['libgeos_c.so.1', 'libgeos_c.so']`. > You may fix this using + > > ``` > sudo ln -s /opt/homebrew/lib/libgeos_c.dylib /usr/local/lib > ``` + > > On Mac with Apple Silicon (M1) in order to install TensorFlow you will have > to edit `cvat/requirements/base.txt`. > Change `tensorflow` to `tensorflow-macos` - > May need to downgrade version Python to 3.9.* or upgrade version `tensorflow-macos` + > May need to downgrade version Python to 3.9.\* or upgrade version `tensorflow-macos` > Note for Arch Linux users: > @@ -189,11 +199,12 @@ description: 'Installing a development environment for different operating syste > Read this article [Node Sass does not yet support your current environment](https://marketplace.visualstudio.com/items?itemName=msjsdiag.debugger-for-chrome) ### Run CVAT + - Start npm UI debug server (run the following command from CVAT root directory): - If you want to run CVAT in localhost: ```sh yarn run start:cvat-ui - ``` + ``` - If you want to access CVAT from outside of your host: ```sh CVAT_UI_HOST='' yarn run start:cvat-ui @@ -208,10 +219,9 @@ description: 'Installing a development environment for different operating syste - Inside VScode, Open CVAT root dir - Select `server: debug` configuration and run it (F5) to run REST server and its workers -- Make sure that ```Uncaught Exceptions``` option under breakpoints section is unchecked +- Make sure that `Uncaught Exceptions` option under breakpoints section is unchecked - If you choose to run CVAT in localhost: Select `server: chrome` configuration and run it (F5) to open CVAT in Chrome -- Alternative: If you changed CVAT_UI_HOST just enter ```:3000``` in your browser. - +- Alternative: If you changed CVAT_UI_HOST just enter `:3000` in your browser. You have done! Now it is possible to insert breakpoints and debug server and client of the tool. Instructions for running tests locally are available [here](/docs/contributing/running-tests/). @@ -238,11 +248,38 @@ You develop CVAT under WSL (Windows subsystem for Linux) following next steps. ## Note for Mac users - You might have to manually start the redis server. You can do this with `redis-server`. -Alternatively you can also use a redis docker image instead of using the redis-server locally. + Alternatively you can also use a redis docker image instead of using the redis-server locally. ## Note for Arch Linux users + - You need to start `redis` and `docker` services manually in order to begin debugging/running tests: ```bash sudo systemctl start redis.service sudo systemctl start docker.service ``` + +## CVAT Analytics Ports + +In case you cannot access analytics, check if the following ports are open: + +```yml +cvat_vector: + ports: + - '8282:80' + + cvat_clickhouse: + ports: + - '8123:8123' +``` + +In addition, you can completely disable analytics if you don't need it by deleting the following data from +[launch.json](https://github.com/opencv/cvat/blob/develop/.vscode/launch.json): + +```json + "DJANGO_LOG_SERVER_HOST": "localhost", + "DJANGO_LOG_SERVER_PORT": "8282" +``` + +Analytics on GitHub: +[Analytics Components](https://github.com/opencv/cvat/tree/develop/components/analytics) + diff --git a/site/content/en/docs/contributing/running-tests.md b/site/content/en/docs/contributing/running-tests.md index 1c8741f3610b..0363f11e08ee 100644 --- a/site/content/en/docs/contributing/running-tests.md +++ b/site/content/en/docs/contributing/running-tests.md @@ -81,7 +81,9 @@ which should be enough to fix errors arising in REST API tests. To debug a server deployed with Docker, you need to do the following: -Rebuild the images and start the test containers: +- Adjust env variables in the `docker-compose.dev.yml` file for your test case + +- Rebuild the images and start the test containers: ```bash CVAT_DEBUG_ENABLED=yes pytest --rebuild --start-services tests/python diff --git a/site/content/en/docs/manual/advanced/analytics.md b/site/content/en/docs/manual/advanced/analytics.md deleted file mode 100644 index 40c7e1f73395..000000000000 --- a/site/content/en/docs/manual/advanced/analytics.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: 'Analytics Monitoring' -linkTitle: 'Analytics Monitoring' -weight: 28 -description: 'Using Analytics to monitor usage statistics.' ---- - -If your CVAT instance was created with analytics support, you can press the `Analytics` button in the dashboard -and analytics and journals will be opened in a new tab. - -![](/images/image113.jpg) - -The analytics allows you to see how much time every user spends on each task -and how much work they did over any time range. - -![](/images/image097.jpg) - -It also has an activity graph which can be modified with a number of users shown and a timeframe. - -![](/images/image096.jpg) diff --git a/site/content/en/docs/manual/advanced/data_on_fly.md b/site/content/en/docs/manual/advanced/data_on_fly.md index d5d3bdda6104..97cc9b050284 100644 --- a/site/content/en/docs/manual/advanced/data_on_fly.md +++ b/site/content/en/docs/manual/advanced/data_on_fly.md @@ -30,6 +30,11 @@ Unfortunately, this method has several drawbacks: - If the data has not been cached yet, and is not reachable during the access time, it cannot be retrieved. +#### How to use + +To enable or disable this feature for a new task, use the [`Use Cache`](/docs/manual/basics/creating_an_annotation_task/#use-cache) +toggle in the task configuration. + #### Uploading a manifest with data When creating a task, you can upload a `manifest.jsonl` file along with the video or dataset with images. diff --git a/site/content/en/docs/manual/advanced/dataset_manifest.md b/site/content/en/docs/manual/advanced/dataset_manifest.md index 57cba6f78b4d..e134dd646896 100644 --- a/site/content/en/docs/manual/advanced/dataset_manifest.md +++ b/site/content/en/docs/manual/advanced/dataset_manifest.md @@ -31,6 +31,20 @@ Manifest files can be used in the following cases: - A video file or a set of images is used as the data source and the caching mode is enabled. [Read more](/docs/manual/advanced/data_on_fly/) - The data is located in a cloud storage. [Read more](/docs/manual/basics/cloud-storages/) +- The `predefined` file sorting method is specified. [Read more](/docs/manual/basics/creating_an_annotation_task/#sorting-method) + +### The predefined sorting method + +Independently of the file source being used, when the `predefined` +sorting method is selected in the task configuration, the source files will be +ordered according to the `.jsonl` manifest file, if it is found in the input list of files. +If a manifest is not found, the order provided in the input file list is used. + +For image archives (e.g. `.zip`), a manifest file (`*.jsonl`) is required when using +the `predefined` file ordering. A manifest file must be provided next to the archive +in the input list of files, it must not be inside the archive. + +If there are multiple manifest files in the input file list, an error will be raised. ## How to generate manifest files diff --git a/site/content/en/docs/manual/advanced/formats/format-camvid.md b/site/content/en/docs/manual/advanced/formats/format-camvid.md index 1f5c81c2a3f4..1485188a5bc6 100644 --- a/site/content/en/docs/manual/advanced/formats/format-camvid.md +++ b/site/content/en/docs/manual/advanced/formats/format-camvid.md @@ -5,6 +5,8 @@ weight: 10 # [CamVid](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/camvid_dataset) + ## CamVid export Downloaded file: a zip archive of the following structure: diff --git a/site/content/en/docs/manual/advanced/formats/format-cityscapes.md b/site/content/en/docs/manual/advanced/formats/format-cityscapes.md index 462c18d72d72..28f51801e694 100644 --- a/site/content/en/docs/manual/advanced/formats/format-cityscapes.md +++ b/site/content/en/docs/manual/advanced/formats/format-cityscapes.md @@ -6,6 +6,7 @@ weight: 16 # [Cityscapes](https://www.cityscapes-dataset.com/login/) - [Format specification](https://github.com/mcordts/cityscapesScripts#the-cityscapes-dataset) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/cityscapes_dataset) - Supported annotations diff --git a/site/content/en/docs/manual/advanced/formats/format-coco.md b/site/content/en/docs/manual/advanced/formats/format-coco.md index f7639bd0bb5e..fc37f8b778d4 100644 --- a/site/content/en/docs/manual/advanced/formats/format-coco.md +++ b/site/content/en/docs/manual/advanced/formats/format-coco.md @@ -6,10 +6,33 @@ weight: 5 # [MS COCO Object Detection](http://cocodataset.org/#format-data) - [Format specification](https://openvinotoolkit.github.io/datumaro/docs/formats/coco/) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/coco_dataset) ## COCO export -Downloaded file: a zip archive with the structure described [here](https://openvinotoolkit.github.io/datumaro/docs/formats/coco/#import-coco-dataset) +Downloaded file: a zip archive with the structure described [here](https://openvinotoolkit.github.io/datumaro/latest/docs/data-formats/formats/coco.html#import-coco-dataset) + +``` +archive.zip/ +├── images/ +│ ├── train/ +│ │ ├── +│ │ ├── +│ │ └── ... +│ └── val/ +│ ├── +│ ├── +│ └── ... +└── annotations/ + ├── _.json + └── ... +``` + +If the dataset is exported from a Project, the subsets are named the same way as they are named +in the project. In other cases there will be a single `default` subset, containing all the data. +The `` part corresponds to one of the COCO tasks: `instances`, `person_keypoints`, +`panoptic`, `image_info`, `labels`, `captions`, `stuff`. There can be several annotation +files in the archive. - supported annotations: Polygons, Rectangles - supported attributes: @@ -21,7 +44,7 @@ Downloaded file: a zip archive with the structure described [here](https://openv - `score` (number) - the annotation `score` field - arbitrary attributes - will be stored in the `attributes` annotation section -Support for COCO tasks via Datumaro is described [here](https://openvinotoolkit.github.io/datumaro/docs/formats/coco/#export-to-other-formats) +Support for COCO tasks via Datumaro is described [here](https://openvinotoolkit.github.io/datumaro/latest/docs/data-formats/formats/coco.html#export-to-other-formats) For example, [support for COCO keypoints over Datumaro](https://github.com/openvinotoolkit/cvat/issues/2910#issuecomment-726077582): 1. Install [Datumaro](https://github.com/openvinotoolkit/datumaro) @@ -35,19 +58,20 @@ keypoint lists (without the `visibility` COCO flag). ## COCO import -Uploaded file: a single unpacked `*.json` or a zip archive with the structure described -[here](https://openvinotoolkit.github.io/datumaro/docs/formats/coco/#import-coco-dataset) +Uploaded file: a single unpacked `*.json` or a zip archive with the structure described above or +[here](https://openvinotoolkit.github.io/datumaro/latest/docs/data-formats/formats/coco.html#import-coco-dataset) (without images). - supported annotations: Polygons, Rectangles (if the `segmentation` field is empty) +- supported tasks: `instances`, `person_keypoints` (only segmentations will be imported), `panoptic` # [MS COCO Keypoint Detection](https://cocodataset.org/#keypoints-2020) -- [Format specification](https://openvinotoolkit.github.io/datumaro/docs/formats/coco/) +- [Format specification](https://openvinotoolkit.github.io/datumaro/latest/docs/data-formats/formats/coco.html) ## COCO export -Downloaded file: a zip archive with the structure described [here](https://openvinotoolkit.github.io/datumaro/docs/formats/coco/#import-coco-dataset) +Downloaded file: a zip archive with the structure described [here](https://openvinotoolkit.github.io/datumaro/latest/docs/data-formats/formats/coco.html#import-coco-dataset) - supported annotations: Skeletons - supported attributes: @@ -62,14 +86,14 @@ Downloaded file: a zip archive with the structure described [here](https://openv ## COCO import Uploaded file: a single unpacked `*.json` or a zip archive with the structure described -[here](https://openvinotoolkit.github.io/datumaro/docs/formats/coco/#import-coco-dataset) +[here](https://openvinotoolkit.github.io/datumaro/latest/docs/data-formats/formats/coco.html#import-coco-dataset) (without images). - supported annotations: Skeletons ## How to create a task from MS COCO dataset -1. Download the [MS COCO dataset](https://openvinotoolkit.github.io/datumaro/docs/formats/coco/#import-coco-dataset). +1. Download the [MS COCO dataset](https://openvinotoolkit.github.io/datumaro/latest/docs/data-formats/formats/coco.html#import-coco-dataset). For example `val images` and `instances` annotations diff --git a/site/content/en/docs/manual/advanced/formats/format-cvat.md b/site/content/en/docs/manual/advanced/formats/format-cvat.md index f8c282865125..e91e89b3b2d3 100644 --- a/site/content/en/docs/manual/advanced/formats/format-cvat.md +++ b/site/content/en/docs/manual/advanced/formats/format-cvat.md @@ -5,6 +5,9 @@ weight: 1 # CVAT +- [Format specification](/docs/manual/advanced/xml_format/) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/cvat_dataset) + This is the native CVAT annotation format. It supports all CVAT annotations features, so it can be used to make data backups. @@ -16,8 +19,6 @@ features, so it can be used to make data backups. - attributes are supported -- [Format specification](/docs/manual/advanced/xml_format/) - ## CVAT for images export Downloaded file: a ZIP file of the following structure: diff --git a/site/content/en/docs/manual/advanced/formats/format-icdar.md b/site/content/en/docs/manual/advanced/formats/format-icdar.md index 43d187d79d42..c558ba76805b 100644 --- a/site/content/en/docs/manual/advanced/formats/format-icdar.md +++ b/site/content/en/docs/manual/advanced/formats/format-icdar.md @@ -5,6 +5,8 @@ weight: 14 # [ICDAR13/15](https://rrc.cvc.uab.es/?ch=2) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/icdar_dataset) + ## ICDAR13/15 export Downloaded file: a zip archive of the following structure: diff --git a/site/content/en/docs/manual/advanced/formats/format-imagenet.md b/site/content/en/docs/manual/advanced/formats/format-imagenet.md index 4f6ac12b1a22..85d7533d8f00 100644 --- a/site/content/en/docs/manual/advanced/formats/format-imagenet.md +++ b/site/content/en/docs/manual/advanced/formats/format-imagenet.md @@ -5,6 +5,8 @@ weight: 9 # [ImageNet](http://www.image-net.org) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/imagenet_dataset) + ## ImageNet export Downloaded file: a zip archive of the following structure: diff --git a/site/content/en/docs/manual/advanced/formats/format-kitti.md b/site/content/en/docs/manual/advanced/formats/format-kitti.md index b90f91ae94a8..06ace2998232 100644 --- a/site/content/en/docs/manual/advanced/formats/format-kitti.md +++ b/site/content/en/docs/manual/advanced/formats/format-kitti.md @@ -7,6 +7,7 @@ weight: 17 - [Format specification for KITTI detection](https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_object.zip) - [Format specification for KITTI segmentation](https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_semantics.zip) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/kitti_dataset) - supported annotations: diff --git a/site/content/en/docs/manual/advanced/formats/format-labelme.md b/site/content/en/docs/manual/advanced/formats/format-labelme.md index b4262f255122..42a0a699e718 100644 --- a/site/content/en/docs/manual/advanced/formats/format-labelme.md +++ b/site/content/en/docs/manual/advanced/formats/format-labelme.md @@ -5,6 +5,8 @@ weight: 2 # [LabelMe](http://labelme.csail.mit.edu/Release3.0) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/labelme_dataset) + ## LabelMe export Downloaded file: a zip archive of the following structure: diff --git a/site/content/en/docs/manual/advanced/formats/format-lfw.md b/site/content/en/docs/manual/advanced/formats/format-lfw.md index fc2f75bb2eef..9e2b44fd0243 100644 --- a/site/content/en/docs/manual/advanced/formats/format-lfw.md +++ b/site/content/en/docs/manual/advanced/formats/format-lfw.md @@ -5,7 +5,8 @@ weight: 17 # [LFW](http://vis-www.cs.umass.edu/lfw/) -- Format specification available [here](http://vis-www.cs.umass.edu/lfw/README.txt) +- [Format specification](http://vis-www.cs.umass.edu/lfw/README.txt) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/lfw_dataset) - Supported annotations: tags, points. diff --git a/site/content/en/docs/manual/advanced/formats/format-market1501.md b/site/content/en/docs/manual/advanced/formats/format-market1501.md index 42a03d670bde..241f248b663a 100644 --- a/site/content/en/docs/manual/advanced/formats/format-market1501.md +++ b/site/content/en/docs/manual/advanced/formats/format-market1501.md @@ -5,6 +5,8 @@ weight: 13 # [Market-1501](https://www.aitribune.com/dataset/2018051063) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/market1501_dataset) + ## Market-1501 export Downloaded file: a zip archive of the following structure: diff --git a/site/content/en/docs/manual/advanced/formats/format-mot.md b/site/content/en/docs/manual/advanced/formats/format-mot.md index 1e6ce4175333..58524375b63d 100644 --- a/site/content/en/docs/manual/advanced/formats/format-mot.md +++ b/site/content/en/docs/manual/advanced/formats/format-mot.md @@ -5,6 +5,8 @@ weight: 3 # [MOT sequence](https://arxiv.org/pdf/1906.04567.pdf) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/mot_dataset) + ## MOT export Downloaded file: a zip archive of the following structure: diff --git a/site/content/en/docs/manual/advanced/formats/format-mots.md b/site/content/en/docs/manual/advanced/formats/format-mots.md index df0a69969d8d..9a4952bdfbb3 100644 --- a/site/content/en/docs/manual/advanced/formats/format-mots.md +++ b/site/content/en/docs/manual/advanced/formats/format-mots.md @@ -5,6 +5,8 @@ weight: 4 # [MOTS PNG](https://www.vision.rwth-aachen.de/page/mots) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/mots_dataset) + ## MOTS PNG export Downloaded file: a zip archive of the following structure: diff --git a/site/content/en/docs/manual/advanced/formats/format-openimages.md b/site/content/en/docs/manual/advanced/formats/format-openimages.md index dc238b472f9a..b80494dca122 100644 --- a/site/content/en/docs/manual/advanced/formats/format-openimages.md +++ b/site/content/en/docs/manual/advanced/formats/format-openimages.md @@ -6,6 +6,7 @@ weight: 15 # [Open Images](https://storage.googleapis.com/openimages/web/index.html) - [Format specification](https://storage.googleapis.com/openimages/web/download.html) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/open_images_dataset) - Supported annotations: diff --git a/site/content/en/docs/manual/advanced/formats/format-tfrecord.md b/site/content/en/docs/manual/advanced/formats/format-tfrecord.md index 7cbbbf6bcf53..b01950ab992f 100644 --- a/site/content/en/docs/manual/advanced/formats/format-tfrecord.md +++ b/site/content/en/docs/manual/advanced/formats/format-tfrecord.md @@ -5,6 +5,8 @@ weight: 8 # [TFRecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/tf_detection_api_dataset) + TFRecord is a very flexible format, but we try to correspond the format that used in [TF object detection](https://github.com/tensorflow/models/tree/master/research/object_detection) diff --git a/site/content/en/docs/manual/advanced/formats/format-vggface2.md b/site/content/en/docs/manual/advanced/formats/format-vggface2.md index d9c8665dc5c7..da32c6872715 100644 --- a/site/content/en/docs/manual/advanced/formats/format-vggface2.md +++ b/site/content/en/docs/manual/advanced/formats/format-vggface2.md @@ -5,6 +5,8 @@ weight: 12 # [VGGFace2](https://github.com/ox-vgg/vgg_face2) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/vgg_face2_dataset) + ## VGGFace2 export Downloaded file: a zip archive of the following structure: diff --git a/site/content/en/docs/manual/advanced/formats/format-voc.md b/site/content/en/docs/manual/advanced/formats/format-voc.md index f735d3900b3e..9703b9d2c8b3 100644 --- a/site/content/en/docs/manual/advanced/formats/format-voc.md +++ b/site/content/en/docs/manual/advanced/formats/format-voc.md @@ -6,12 +6,13 @@ weight: 6 # [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/devkit_doc.pdf) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/voc_dataset) - supported annotations: - - Rectangles (detection and layout tasks) - - Tags (action- and classification tasks) - - Polygons (segmentation task) + - Rectangles (`detection` and `layout` tasks) + - Tags (`action-` and `classification` tasks) + - Polygons (`segmentation` task) - supported attributes: diff --git a/site/content/en/docs/manual/advanced/formats/format-widerface.md b/site/content/en/docs/manual/advanced/formats/format-widerface.md index e5c5dd33d34d..36be8290d489 100644 --- a/site/content/en/docs/manual/advanced/formats/format-widerface.md +++ b/site/content/en/docs/manual/advanced/formats/format-widerface.md @@ -5,6 +5,8 @@ weight: 9 # [WIDER Face](http://shuoyang1213.me/WIDERFACE/) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/widerface_dataset) + ## WIDER Face export Downloaded file: a zip archive of the following structure: diff --git a/site/content/en/docs/manual/advanced/formats/format-yolo.md b/site/content/en/docs/manual/advanced/formats/format-yolo.md index edc773f3ad2b..9fdcef9a8e9c 100644 --- a/site/content/en/docs/manual/advanced/formats/format-yolo.md +++ b/site/content/en/docs/manual/advanced/formats/format-yolo.md @@ -6,6 +6,7 @@ weight: 7 # [YOLO](https://pjreddie.com/darknet/yolo/) - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects) +- [Dataset examples](https://github.com/cvat-ai/datumaro/tree/v0.3/tests/assets/yolo_dataset) - supported annotations: Rectangles ## YOLO export diff --git a/site/content/en/images/analytic_architecture.jpg b/site/content/en/images/analytic_architecture.jpg new file mode 100644 index 000000000000..64a4e5f3b1d0 Binary files /dev/null and b/site/content/en/images/analytic_architecture.jpg differ diff --git a/site/content/en/images/analytics_menu.jpg b/site/content/en/images/analytics_menu.jpg new file mode 100644 index 000000000000..47c55f3d3d10 Binary files /dev/null and b/site/content/en/images/analytics_menu.jpg differ diff --git a/site/content/en/images/apply.jpg b/site/content/en/images/apply.jpg new file mode 100644 index 000000000000..1735276df17f Binary files /dev/null and b/site/content/en/images/apply.jpg differ diff --git a/site/content/en/images/dashboard_00.jpg b/site/content/en/images/dashboard_00.jpg new file mode 100644 index 000000000000..fb345e9be2a3 Binary files /dev/null and b/site/content/en/images/dashboard_00.jpg differ diff --git a/site/content/en/images/dashboard_01.jpg b/site/content/en/images/dashboard_01.jpg new file mode 100644 index 000000000000..6f1c621c7b64 Binary files /dev/null and b/site/content/en/images/dashboard_01.jpg differ diff --git a/site/content/en/images/dashboard_02.jpg b/site/content/en/images/dashboard_02.jpg new file mode 100644 index 000000000000..d3a5f72a87e5 Binary files /dev/null and b/site/content/en/images/dashboard_02.jpg differ diff --git a/site/content/en/images/dashboard_03.jpg b/site/content/en/images/dashboard_03.jpg new file mode 100644 index 000000000000..c6deea297fa8 Binary files /dev/null and b/site/content/en/images/dashboard_03.jpg differ diff --git a/site/content/en/images/dashboard_04.jpg b/site/content/en/images/dashboard_04.jpg new file mode 100644 index 000000000000..9842f16b865c Binary files /dev/null and b/site/content/en/images/dashboard_04.jpg differ diff --git a/site/content/en/images/image096.jpg b/site/content/en/images/image096.jpg deleted file mode 100644 index ea0550636972..000000000000 Binary files a/site/content/en/images/image096.jpg and /dev/null differ diff --git a/site/content/en/images/image097.jpg b/site/content/en/images/image097.jpg deleted file mode 100644 index 83ec894c595f..000000000000 Binary files a/site/content/en/images/image097.jpg and /dev/null differ diff --git a/site/content/en/images/image113.jpg b/site/content/en/images/image113.jpg deleted file mode 100644 index f12fe1762e06..000000000000 Binary files a/site/content/en/images/image113.jpg and /dev/null differ diff --git a/site/content/en/images/ping_webhook.jpg b/site/content/en/images/ping_webhook.jpg new file mode 100644 index 000000000000..16fd0b48a459 Binary files /dev/null and b/site/content/en/images/ping_webhook.jpg differ diff --git a/site/content/en/images/save_json.jpg b/site/content/en/images/save_json.jpg new file mode 100644 index 000000000000..a297cc15558a Binary files /dev/null and b/site/content/en/images/save_json.jpg differ diff --git a/site/content/en/images/save_results.jpg b/site/content/en/images/save_results.jpg new file mode 100644 index 000000000000..3ec47f3941a3 Binary files /dev/null and b/site/content/en/images/save_results.jpg differ diff --git a/site/content/en/images/supported_events.jpg b/site/content/en/images/supported_events.jpg new file mode 100644 index 000000000000..eedafa7776e8 Binary files /dev/null and b/site/content/en/images/supported_events.jpg differ diff --git a/site/content/en/images/webhook_form_project_org.jpg b/site/content/en/images/webhook_form_project_org.jpg new file mode 100644 index 000000000000..23158279eef0 Binary files /dev/null and b/site/content/en/images/webhook_form_project_org.jpg differ diff --git a/supervisord/server.conf b/supervisord/server.conf index 402a0870bf8a..c842db3e4178 100644 --- a/supervisord/server.conf +++ b/supervisord/server.conf @@ -18,30 +18,29 @@ pidfile=/tmp/supervisord/supervisord.pid ; pidfile location childlogdir=%(ENV_HOME)s/logs/ ; where child log files will live [program:clamav_update] +startsecs=0 command=bash -c "if [ \"${CLAM_AV}\" = 'yes' ]; then /usr/bin/freshclam -d \ -l %(ENV_HOME)s/logs/freshclam.log --foreground=true; fi" - [program:ssh-agent] command=bash -c "rm /tmp/ssh-agent.sock -f && /usr/bin/ssh-agent -d -a /tmp/ssh-agent.sock" priority=1 autorestart=true - -[program:runserver] -; Here need to run a couple of commands to initialize DB and copy static files. -; We cannot initialize DB on build because the DB should be online. Also some -; apps are dynamically loaded by an environment variable. It can lead to issues -; with docker cache. Thus it is necessary to run collectstatic here for such -; apps. -command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_POSTGRES_HOST)s:5432 -t 0 -- bash -ic \ - "rm -f /tmp/cvat-server/httpd.pid && python3 ~/manage.py migrate && \ - python3 ~/manage.py collectstatic --no-input && \ - exec python3 $HOME/manage.py runmodwsgi --log-to-terminal --port 8080 \ - --limit-request-body 1073741824 --log-level INFO --include-file ~/mod_wsgi.conf \ - %(ENV_DJANGO_MODWSGI_EXTRA_ARGS)s --locale %(ENV_LC_ALL)s \ - --server-root /tmp/cvat-server" - - +[program:nginx] +command=/usr/sbin/nginx -g "daemon off;" +autostart=true +autorestart=true +startretries=5 +numprocs=1 +process_name=%(program_name)s-%(process_num)s + +[fcgi-program:uvicorn] +socket=tcp://localhost:8000 +command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_POSTGRES_HOST)s:5432 -t 0 -- python3 -m uvicorn + --fd 0 --forwarded-allow-ips='*' cvat.asgi:application environment=SSH_AUTH_SOCK="/tmp/ssh-agent.sock" numprocs=%(ENV_NUMPROCS)s +process_name=%(program_name)s-%(process_num)s +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 diff --git a/supervisord/utils.conf b/supervisord/utils.conf index edea3746c254..925ada37324f 100644 --- a/supervisord/utils.conf +++ b/supervisord/utils.conf @@ -22,7 +22,7 @@ command=bash -c "rm /tmp/ssh-agent.sock -f && /usr/bin/ssh-agent -d -a /tmp/ssh- priority=1 autorestart=true -[program:git_status_updater] +[program:git-status-updater] command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic \ "python3 ~/manage.py update_git_states" environment=SSH_AUTH_SOCK="/tmp/ssh-agent.sock" @@ -34,10 +34,19 @@ command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -i environment=SSH_AUTH_SOCK="/tmp/ssh-agent.sock",VECTOR_EVENT_HANDLER="SynchronousLogstashHandler" numprocs=1 -[program:rqworker_notifications] +[program:rqworker-notifications] command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic " \ exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 notifications \ --worker-class cvat.rqworker.DefaultWorker \ " environment=VECTOR_EVENT_HANDLER="SynchronousLogstashHandler" numprocs=1 + +[program:rqworker_cleaning] +command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic " \ + exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 cleaning \ + --worker-class cvat.rqworker.DefaultWorker \ + " +environment=SSH_AUTH_SOCK="/tmp/ssh-agent.sock",VECTOR_EVENT_HANDLER="SynchronousLogstashHandler" +numprocs=%(ENV_NUMPROCS)s +process_name=rqworker_cleaning_%(process_num)s \ No newline at end of file diff --git a/supervisord/worker.annotation.conf b/supervisord/worker.annotation.conf index c6fa5103a043..c4615370a606 100644 --- a/supervisord/worker.annotation.conf +++ b/supervisord/worker.annotation.conf @@ -22,7 +22,7 @@ command=bash -c "rm /tmp/ssh-agent.sock -f && /usr/bin/ssh-agent -d -a /tmp/ssh- priority=1 autorestart=true -[program:rqworker_annotation] +[program:rqworker-annotation] command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic " \ exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 annotation \ --worker-class cvat.rqworker.DefaultWorker \ diff --git a/supervisord/worker.export.conf b/supervisord/worker.export.conf index 72e136428773..87cd7dc1cd04 100644 --- a/supervisord/worker.export.conf +++ b/supervisord/worker.export.conf @@ -22,11 +22,11 @@ command=bash -c "rm /tmp/ssh-agent.sock -f && /usr/bin/ssh-agent -d -a /tmp/ssh- priority=1 autorestart=true -[program:rqworker_export] +[program:rqworker-export] command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic " \ exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 export \ --worker-class cvat.rqworker.DefaultWorker \ " environment=SSH_AUTH_SOCK="/tmp/ssh-agent.sock",VECTOR_EVENT_HANDLER="SynchronousLogstashHandler" numprocs=%(ENV_NUMPROCS)s -process_name=rqworker_export_%(process_num)s +process_name=%(program_name)s-%(process_num)s diff --git a/supervisord/worker.import.conf b/supervisord/worker.import.conf index 975a9251f353..30b83ca6046d 100644 --- a/supervisord/worker.import.conf +++ b/supervisord/worker.import.conf @@ -22,17 +22,18 @@ command=bash -c "rm /tmp/ssh-agent.sock -f && /usr/bin/ssh-agent -d -a /tmp/ssh- priority=1 autorestart=true -[program:rqworker_import] +[program:rqworker-import] command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic " \ exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 import \ --worker-class cvat.rqworker.DefaultWorker \ " environment=SSH_AUTH_SOCK="/tmp/ssh-agent.sock",VECTOR_EVENT_HANDLER="SynchronousLogstashHandler" numprocs=%(ENV_NUMPROCS)s -process_name=rqworker_import_%(process_num)s +process_name=%(program_name)s-%(process_num)s -[program:clamav_update] +[program:clamav-update] command=bash -c "if [ \"${CLAM_AV}\" = 'yes' ]; then /usr/bin/freshclam -d \ -l %(ENV_HOME)s/logs/freshclam.log --foreground=true; fi" numprocs=1 +startsecs=0 diff --git a/tests/python/cli/test_cli.py b/tests/python/cli/test_cli.py index 50b0d3d43eaf..8b0e5bf01114 100644 --- a/tests/python/cli/test_cli.py +++ b/tests/python/cli/test_cli.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT @@ -116,6 +116,39 @@ def test_can_create_task_from_local_images(self): task_id = int(stdout.split()[-1]) assert self.client.tasks.retrieve(task_id).size == 5 + def test_can_create_task_from_local_images_with_parameters(self): + # Checks for regressions of + + files = generate_images(self.tmp_path, 7) + files.sort(reverse=True) + frame_step = 3 + + stdout = self.run_cli( + "create", + "test_task", + ResourceType.LOCAL.name, + *map(os.fspath, files), + "--labels", + json.dumps([{"name": "car"}, {"name": "person"}]), + "--completion_verification_period", + "0.01", + "--sorting-method", + "predefined", + "--frame_step", + str(frame_step), + "--bug_tracker", + "http://localhost/bug", + ) + + task_id = int(stdout.split()[-1]) + task = self.client.tasks.retrieve(task_id) + frames = task.get_frames_info() + assert [f.name for f in frames] == [ + f.name for i, f in enumerate(files) if i % frame_step == 0 + ] + assert task.get_meta().frame_filter == f"step={frame_step}" + assert task.bug_tracker == "http://localhost/bug" + def test_can_list_tasks_in_simple_format(self, fxt_new_task: Task): output = self.run_cli("ls") diff --git a/tests/python/pytest.ini b/tests/python/pytest.ini index 653ae999256e..05cda52273da 100644 --- a/tests/python/pytest.ini +++ b/tests/python/pytest.ini @@ -8,4 +8,3 @@ timeout = 15 markers = with_external_services: The test requires services extrernal to the default CVAT deployment, e.g. a Git server etc. - diff --git a/tests/python/rest_api/test_analytics.py b/tests/python/rest_api/test_analytics.py index 6dcfae58ef64..62a4bb8e144b 100644 --- a/tests/python/rest_api/test_analytics.py +++ b/tests/python/rest_api/test_analytics.py @@ -1,5 +1,5 @@ # Copyright (C) 2022 Intel Corporation -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT @@ -18,7 +18,7 @@ from shared.utils.config import delete_method, make_api_client, server_get from shared.utils.helpers import generate_image_files -from .utils import _test_create_task +from .utils import create_task class TestGetAnalytics: @@ -86,35 +86,55 @@ def setup(self, restore_clickhouse_db_per_function): "segment_size": 2, "project_id": self.project_id, } - task_data = { - "image_quality": 10, - "client_files": generate_image_files(3), - } task_ids = [ - _test_create_task( - self._USERNAME, task_spec, task_data, content_type="multipart/form-data" + create_task( + self._USERNAME, + task_spec, + { + "image_quality": 10, + "client_files": generate_image_files(3), + }, ), - _test_create_task( - self._USERNAME, task_spec, task_data, content_type="multipart/form-data" + create_task( + self._USERNAME, + task_spec, + { + "image_quality": 10, + "client_files": generate_image_files(3), + }, ), ] self.task_ids = [t[0] for t in task_ids] - expected_request_ids = [project_request_id, *[t[1] for t in task_ids]] + assert project_request_id is not None + assert all(t[1] is not None for t in task_ids) - assert all(req_id is not None for req_id in expected_request_ids) - - self._wait_for_request_ids(expected_request_ids) + event_filters = [ + ( + (lambda e: json.loads(e["payload"])["request"]["id"], [project_request_id]), + ("scope", ["create:project"]), + ), + ] + for task_id in task_ids: + event_filters.extend( + ( + ( + (lambda e: json.loads(e["payload"])["request"]["id"], [task_id[1]]), + ("scope", ["create:task"]), + ), + (("scope", ["create:job"]),), + ) + ) + self._wait_for_request_ids(event_filters) - def _wait_for_request_ids(self, expected_request_ids): + def _wait_for_request_ids(self, event_filters): MAX_RETRIES = 5 SLEEP_INTERVAL = 2 while MAX_RETRIES > 0: data = self._test_get_audit_logs_as_csv() events = self._csv_to_dict(data) - request_ids = set(json.loads(e["payload"])["request"]["id"] for e in events) - if all(req_id in request_ids for req_id in expected_request_ids): + if all(self._filter_events(events, filter) for filter in event_filters): break MAX_RETRIES -= 1 sleep(SLEEP_INTERVAL) @@ -155,13 +175,12 @@ def _csv_to_dict(csv_data): return res @staticmethod - def _filter_events(events, filter_): + def _filter_events(events, filters): res = [] - for event in events: - if all( - (event[filter_key] == filter_value for filter_key, filter_value in filter_.items()) - ): - res.append(event) + get_value = lambda getter, e: getter(e) if callable(getter) else e.get(getter, None) + for e in events: + if all(get_value(getter, e) in expected_values for getter, expected_values in filters): + res.append(e) return res @@ -195,7 +214,7 @@ def test_filter_by_project(self): data = self._test_get_audit_logs_as_csv(**query_params) events = self._csv_to_dict(data) - filtered_events = self._filter_events(events, {"project_id": str(self.project_id)}) + filtered_events = self._filter_events(events, [("project_id", [str(self.project_id)])]) assert len(filtered_events) assert len(events) == len(filtered_events) @@ -213,7 +232,7 @@ def test_filter_by_task(self): data = self._test_get_audit_logs_as_csv(**query_params) events = self._csv_to_dict(data) - filtered_events = self._filter_events(events, {"task_id": str(task_id)}) + filtered_events = self._filter_events(events, [("task_id", [str(task_id)])]) assert len(filtered_events) assert len(events) == len(filtered_events) @@ -251,7 +270,24 @@ def test_delete_project(self): response = delete_method("admin1", f"projects/{self.project_id}") assert response.status_code == HTTPStatus.NO_CONTENT - self._wait_for_request_ids([response.headers.get("X-Request-Id")]) + event_filters = ( + ( + ( + lambda e: json.loads(e["payload"])["request"]["id"], + [response.headers.get("X-Request-Id")], + ), + ("scope", ["delete:project"]), + ), + ( + ( + lambda e: json.loads(e["payload"])["request"]["id"], + [response.headers.get("X-Request-Id")], + ), + ("scope", ["delete:task"]), + ), + ) + + self._wait_for_request_ids(event_filters) query_params = { "project_id": self.project_id, @@ -260,7 +296,7 @@ def test_delete_project(self): data = self._test_get_audit_logs_as_csv(**query_params) events = self._csv_to_dict(data) - filtered_events = self._filter_events(events, {"project_id": str(self.project_id)}) + filtered_events = self._filter_events(events, [("project_id", [str(self.project_id)])]) assert len(filtered_events) assert len(events) == len(filtered_events) diff --git a/tests/python/rest_api/test_cloud_storages.py b/tests/python/rest_api/test_cloud_storages.py index da308e4410b5..02f0030984d1 100644 --- a/tests/python/rest_api/test_cloud_storages.py +++ b/tests/python/rest_api/test_cloud_storages.py @@ -16,7 +16,7 @@ from deepdiff import DeepDiff from PIL import Image -from shared.utils.config import make_api_client +from shared.utils.config import get_method, make_api_client from .utils import CollectionSimpleFilterTestBase @@ -575,3 +575,22 @@ def test_iterate_over_cloud_storage_content( break assert expected_content == current_content + + +@pytest.mark.usefixtures("restore_db_per_class") +class TestListCloudStorages: + def _test_can_see_cloud_storages(self, user, data, **kwargs): + response = get_method(user, "cloudstorages", **kwargs) + + assert response.status_code == HTTPStatus.OK + assert DeepDiff(data, response.json()["results"]) == {} + + def test_admin_can_see_all_cloud_storages(self, cloud_storages): + self._test_can_see_cloud_storages("admin2", cloud_storages.raw, page_size="all") + + @pytest.mark.parametrize("field_value, query_value", [(2, 2), (None, "")]) + def test_can_filter_by_org_id(self, field_value, query_value, cloud_storages): + cloud_storages = filter(lambda i: i["organization"] == field_value, cloud_storages) + self._test_can_see_cloud_storages( + "admin2", list(cloud_storages), page_size="all", org_id=query_value + ) diff --git a/tests/python/rest_api/test_invitations.py b/tests/python/rest_api/test_invitations.py index b9bc4f91535e..2251b14a3f10 100644 --- a/tests/python/rest_api/test_invitations.py +++ b/tests/python/rest_api/test_invitations.py @@ -7,8 +7,9 @@ import pytest from cvat_sdk.api_client.api_client import ApiClient, Endpoint +from deepdiff import DeepDiff -from shared.utils.config import post_method +from shared.utils.config import get_method, post_method from .utils import CollectionSimpleFilterTestBase @@ -120,3 +121,22 @@ def _get_endpoint(self, api_client: ApiClient) -> Endpoint: ) def test_can_use_simple_filter_for_object_list(self, field): return super().test_can_use_simple_filter_for_object_list(field) + + +@pytest.mark.usefixtures("restore_db_per_class") +class TestListInvitations: + def _test_can_see_invitations(self, user, data, **kwargs): + response = get_method(user, "invitations", **kwargs) + + assert response.status_code == HTTPStatus.OK + assert DeepDiff(data, response.json()["results"]) == {} + + def test_admin_can_see_all_invitations(self, invitations): + self._test_can_see_invitations("admin2", invitations.raw, page_size="all") + + @pytest.mark.parametrize("field_value, query_value", [(1, 1), (None, "")]) + def test_can_filter_by_org_id(self, field_value, query_value, invitations): + invitations = filter(lambda i: i["organization"] == field_value, invitations) + self._test_can_see_invitations( + "admin2", list(invitations), page_size="all", org_id=query_value + ) diff --git a/tests/python/rest_api/test_issues.py b/tests/python/rest_api/test_issues.py index dd80869cb59d..74f09c1a8d53 100644 --- a/tests/python/rest_api/test_issues.py +++ b/tests/python/rest_api/test_issues.py @@ -13,7 +13,7 @@ from cvat_sdk.api_client.api_client import ApiClient, Endpoint from deepdiff import DeepDiff -from shared.utils.config import make_api_client +from shared.utils.config import get_method, make_api_client from .utils import CollectionSimpleFilterTestBase @@ -394,3 +394,20 @@ def _get_field_samples(self, field: str) -> Tuple[Any, List[Dict[str, Any]]]: ) def test_can_use_simple_filter_for_object_list(self, field): return super().test_can_use_simple_filter_for_object_list(field) + + +@pytest.mark.usefixtures("restore_db_per_class") +class TestListIssues: + def _test_can_see_issues(self, user, data, **kwargs): + response = get_method(user, "issues", **kwargs) + + assert response.status_code == HTTPStatus.OK + assert DeepDiff(data, response.json()["results"]) == {} + + def test_admin_can_see_all_issues(self, issues): + self._test_can_see_issues("admin2", issues.raw, page_size="all") + + @pytest.mark.parametrize("field_value, query_value", [(1, 1), (None, "")]) + def test_can_filter_by_org_id(self, field_value, query_value, issues, jobs): + issues = filter(lambda i: jobs[i["job"]]["organization"] == field_value, issues) + self._test_can_see_issues("admin2", list(issues), page_size="all", org_id=query_value) diff --git a/tests/python/rest_api/test_memberships.py b/tests/python/rest_api/test_memberships.py index 31f4b3447722..906fe99dceda 100644 --- a/tests/python/rest_api/test_memberships.py +++ b/tests/python/rest_api/test_memberships.py @@ -30,6 +30,13 @@ def _test_cannot_see_memberships(self, user, **kwargs): def test_admin_can_see_all_memberships(self, memberships): self._test_can_see_memberships("admin2", memberships.raw, page_size="all") + @pytest.mark.parametrize("field_value, query_value", [(1, 1), (None, "")]) + def test_can_filter_by_org_id(self, field_value, query_value, memberships): + memberships = filter(lambda m: m["organization"] == field_value, memberships) + self._test_can_see_memberships( + "admin2", list(memberships), page_size="all", org_id=query_value + ) + def test_non_admin_can_see_only_self_memberships(self, memberships): non_admins = ["business1", "user1", "dummy1", "worker2"] for username in non_admins: diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index 0e296122d9d7..40def2386124 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -213,9 +213,7 @@ def test_org_worker_cannot_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_cannot_get_project_backup( - user["username"], project["id"], org_id=project["organization"] - ) + self._test_cannot_get_project_backup(user["username"], project["id"]) # Org worker that in [project:owner, project:assignee] can get project backup. def test_org_worker_can_get_project_backup( @@ -231,9 +229,7 @@ def test_org_worker_can_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_can_get_project_backup( - user["username"], project["id"], org_id=project["organization"] - ) + self._test_can_get_project_backup(user["username"], project["id"]) # Org supervisor that in [project:owner, project:assignee] can get project backup. def test_org_supervisor_can_get_project_backup( @@ -249,9 +245,7 @@ def test_org_supervisor_can_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_can_get_project_backup( - user["username"], project["id"], org_id=project["organization"] - ) + self._test_can_get_project_backup(user["username"], project["id"]) # Org supervisor that not in [project:owner, project:assignee] cannot get project backup. def test_org_supervisor_cannot_get_project_backup( @@ -267,9 +261,7 @@ def test_org_supervisor_cannot_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_cannot_get_project_backup( - user["username"], project["id"], org_id=project["organization"] - ) + self._test_cannot_get_project_backup(user["username"], project["id"]) # Org maintainer that not in [project:owner, project:assignee] can get project backup. def test_org_maintainer_can_get_project_backup( @@ -285,9 +277,7 @@ def test_org_maintainer_can_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_can_get_project_backup( - user["username"], project["id"], org_id=project["organization"] - ) + self._test_can_get_project_backup(user["username"], project["id"]) # Org owner that not in [project:owner, project:assignee] can get project backup. def test_org_owner_can_get_project_backup( @@ -303,9 +293,7 @@ def test_org_owner_can_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_can_get_project_backup( - user["username"], project["id"], org_id=project["organization"] - ) + self._test_can_get_project_backup(user["username"], project["id"]) @pytest.mark.usefixtures("restore_db_per_function") @@ -402,10 +390,10 @@ def test_cannot_create_project_with_same_labels(self, admin_user): "name": "test cannot create project with same labels", "labels": [{"name": "l1"}, {"name": "l1"}], } - response = post_method(admin_user, "/projects", project_spec) + response = post_method(admin_user, "projects", project_spec) assert response.status_code == HTTPStatus.BAD_REQUEST - response = get_method(admin_user, "/projects") + response = get_method(admin_user, "projects") assert response.status_code == HTTPStatus.OK def test_cannot_create_project_with_same_skeleton_sublabels(self, admin_user): @@ -415,10 +403,10 @@ def test_cannot_create_project_with_same_skeleton_sublabels(self, admin_user): {"name": "s1", "type": "skeleton", "sublabels": [{"name": "1"}, {"name": "1"}]} ], } - response = post_method(admin_user, "/projects", project_spec) + response = post_method(admin_user, "projects", project_spec) assert response.status_code == HTTPStatus.BAD_REQUEST - response = get_method(admin_user, "/projects") + response = get_method(admin_user, "projects") assert response.status_code == HTTPStatus.OK @pytest.mark.parametrize( @@ -442,7 +430,7 @@ def test_user_cannot_create_project_with_cloud_storage_without_access( }, } - response = post_method(user, "/projects", project_spec) + response = post_method(user, "projects", project_spec) assert response.status_code == HTTPStatus.FORBIDDEN @@ -485,11 +473,13 @@ def _test_import_project(self, username, project_id, format_name, data): _content_type="multipart/form-data", ) assert response.status == HTTPStatus.ACCEPTED + rq_id = json.loads(response.data).get("rq_id") + assert rq_id, "The rq_id was not found in the response" while True: # TODO: It's better be refactored to a separate endpoint to get request status (_, response) = api_client.projects_api.retrieve_dataset( - project_id, action="import_status" + project_id, action="import_status", rq_id=rq_id ) if response.status == HTTPStatus.CREATED: break @@ -663,7 +653,7 @@ def test_can_import_export_annotations_with_rotation(self): self._test_import_project(username, project_id, "CVAT 1.1", import_data) - response = get_method(username, f"/tasks", project_id=project_id) + response = get_method(username, f"tasks", project_id=project_id) assert response.status_code == HTTPStatus.OK tasks = response.json()["results"] @@ -698,7 +688,7 @@ def test_can_delete_label(self, projects, labels, admin_user): label_payload = {"id": label["id"], "deleted": True} response = patch_method( - admin_user, f'/projects/{project["id"]}', {"labels": [label_payload]} + admin_user, f'projects/{project["id"]}', {"labels": [label_payload]} ) assert response.status_code == HTTPStatus.OK, response.content assert response.json()["labels"]["count"] == project["labels"]["count"] - 1 @@ -720,7 +710,7 @@ def test_can_delete_skeleton_label(self, projects, labels, admin_user): label_payload = {"id": label["id"], "deleted": True} response = patch_method( - admin_user, f'/projects/{project["id"]}', {"labels": [label_payload]} + admin_user, f'projects/{project["id"]}', {"labels": [label_payload]} ) assert response.status_code == HTTPStatus.OK assert response.json()["labels"]["count"] == project["labels"]["count"] - 1 @@ -734,7 +724,7 @@ def test_can_rename_label(self, projects, labels, admin_user): project_labels[0].update({"name": "new name"}) response = patch_method( - admin_user, f'/projects/{project["id"]}', {"labels": [project_labels[0]]} + admin_user, f'projects/{project["id"]}', {"labels": [project_labels[0]]} ) assert response.status_code == HTTPStatus.OK @@ -749,7 +739,7 @@ def test_cannot_rename_label_to_duplicate_name(self, projects, labels, admin_use label_payload = {"id": project_labels[0]["id"], "name": project_labels[0]["name"]} response = patch_method( - admin_user, f'/projects/{project["id"]}', {"labels": [label_payload]} + admin_user, f'projects/{project["id"]}', {"labels": [label_payload]} ) assert response.status_code == HTTPStatus.BAD_REQUEST assert "All label names must be unique" in response.text @@ -758,7 +748,7 @@ def test_cannot_add_foreign_label(self, projects, labels, admin_user): project = list(projects)[0] new_label = deepcopy([l for l in labels if l.get("project_id") != project["id"]][0]) - response = patch_method(admin_user, f'/projects/{project["id"]}', {"labels": [new_label]}) + response = patch_method(admin_user, f'projects/{project["id"]}', {"labels": [new_label]}) assert response.status_code == HTTPStatus.NOT_FOUND assert f"Not found label with id #{new_label['id']} to change" in response.text @@ -766,7 +756,7 @@ def test_admin_can_add_label(self, projects, admin_user): project = list(projects)[0] new_label = {"name": "new name"} - response = patch_method(admin_user, f'/projects/{project["id"]}', {"labels": [new_label]}) + response = patch_method(admin_user, f'projects/{project["id"]}', {"labels": [new_label]}) assert response.status_code == HTTPStatus.OK assert response.json()["labels"]["count"] == project["labels"]["count"] + 1 @@ -792,9 +782,8 @@ def test_non_project_staff_privileged_org_members_can_add_label( new_label = {"name": "new name"} response = patch_method( user["username"], - f'/projects/{project["id"]}', + f'projects/{project["id"]}', {"labels": [new_label]}, - org_id=project["organization"], ) assert response.status_code == HTTPStatus.OK assert response.json()["labels"]["count"] == project["labels"]["count"] + 1 @@ -821,9 +810,8 @@ def test_non_project_staff_org_members_cannot_add_label( new_label = {"name": "new name"} response = patch_method( user["username"], - f'/projects/{project["id"]}', + f'projects/{project["id"]}', {"labels": [new_label]}, - org_id=project["organization"], ) assert response.status_code == HTTPStatus.FORBIDDEN @@ -846,9 +834,8 @@ def test_project_staff_org_members_can_add_label( new_label = {"name": "new name"} response = patch_method( user["username"], - f'/projects/{project["id"]}', + f'projects/{project["id"]}', {"labels": [new_label]}, - org_id=project["organization"], ) assert response.status_code == HTTPStatus.OK assert response.json()["labels"]["count"] == project["labels"]["count"] + 1 @@ -869,9 +856,7 @@ def test_admin_can_add_skeleton(self, projects, admin_user): 'data-element-id="1" data-node-id="1" data-label-name="597501">', } - response = patch_method( - admin_user, f'/projects/{project["id"]}', {"labels": [new_skeleton]} - ) + response = patch_method(admin_user, f'projects/{project["id"]}', {"labels": [new_skeleton]}) assert response.status_code == HTTPStatus.OK assert response.json()["labels"]["count"] == project["labels"]["count"] + 1 @@ -1015,7 +1000,7 @@ def test_user_cannot_update_project_with_cloud_storage_without_access( project_spec = { "name": f"Project with foreign cloud storage {storage_id} settings", } - response = post_method(user, "/projects", project_spec) + response = post_method(user, "projects", project_spec) updated_fields = { field: { @@ -1025,5 +1010,5 @@ def test_user_cannot_update_project_with_cloud_storage_without_access( } project_id = response.json()["id"] - response = patch_method(user, f"/projects/{project_id}", updated_fields) + response = patch_method(user, f"projects/{project_id}", updated_fields) assert response.status_code == HTTPStatus.FORBIDDEN diff --git a/tests/python/rest_api/test_resource_import_export.py b/tests/python/rest_api/test_resource_import_export.py index a079c3972338..78b1d0cf1b26 100644 --- a/tests/python/rest_api/test_resource_import_export.py +++ b/tests/python/rest_api/test_resource_import_export.py @@ -15,7 +15,7 @@ ) from shared.utils.s3 import make_client as make_s3_client -from .utils import _test_create_task +from .utils import create_task # https://docs.pytest.org/en/7.1.x/example/markers.html#marking-whole-classes-or-modules pytestmark = [pytest.mark.with_external_services] @@ -135,7 +135,7 @@ def test_user_cannot_export_to_cloud_storage_with_specific_location_without_acce user = regular_lonely_user project_spec = {"name": "Test project"} - project = post_method(user, "/projects", project_spec).json() + project = post_method(user, "projects", project_spec).json() project_id = project["id"] task_spec = { @@ -152,11 +152,9 @@ def test_user_cannot_export_to_cloud_storage_with_specific_location_without_acce "server_files": ["images/image_1.jpg"], "project_id": project_id, } - (task_id, _) = _test_create_task( - user, task_spec, data_spec, content_type="application/json" - ) + (task_id, _) = create_task(user, task_spec, data_spec) - jobs = get_method(user, "/jobs", task_id=task_id).json()["results"] + jobs = get_method(user, "jobs", task_id=task_id).json()["results"] job_id = jobs[0]["id"] if obj == "projects": @@ -283,7 +281,7 @@ def test_user_cannot_import_from_cloud_storage_with_specific_location_without_ac user = regular_lonely_user project_spec = {"name": "Test project"} - project = post_method(user, "/projects", project_spec).json() + project = post_method(user, "projects", project_spec).json() project_id = project["id"] task_spec = { @@ -300,11 +298,9 @@ def test_user_cannot_import_from_cloud_storage_with_specific_location_without_ac "server_files": ["images/image_1.jpg"], "project_id": project_id, } - (task_id, _) = _test_create_task( - user, task_spec, data_spec, content_type="application/json" - ) + (task_id, _) = create_task(user, task_spec, data_spec) - jobs = get_method(user, "/jobs", task_id=task_id).json()["results"] + jobs = get_method(user, "jobs", task_id=task_id).json()["results"] job_id = jobs[0]["id"] if obj == "projects": diff --git a/tests/python/rest_api/test_tasks.py b/tests/python/rest_api/test_tasks.py index d680e1589d1b..9300dafd2731 100644 --- a/tests/python/rest_api/test_tasks.py +++ b/tests/python/rest_api/test_tasks.py @@ -11,8 +11,10 @@ from functools import partial from http import HTTPStatus from itertools import chain, product +from math import ceil from pathlib import Path -from tempfile import TemporaryDirectory +from tempfile import NamedTemporaryFile, TemporaryDirectory +from time import sleep, time from typing import List, Optional import pytest @@ -21,10 +23,12 @@ from cvat_sdk.api_client.api_client import ApiClient, Endpoint from cvat_sdk.core.helpers import get_paginated_collection from cvat_sdk.core.proxies.tasks import ResourceType, Task +from cvat_sdk.core.uploading import Uploader from deepdiff import DeepDiff from PIL import Image import shared.utils.s3 as s3 +from shared.fixtures.init import docker_exec_cvat, kube_exec_cvat from shared.utils.config import ( BASE_URL, USER_PASS, @@ -37,7 +41,7 @@ from .utils import ( CollectionSimpleFilterTestBase, - _test_create_task, + create_task, export_dataset, wait_until_task_is_created, ) @@ -614,23 +618,19 @@ def test_can_create_task_with_defined_start_and_stop_frames(self): "client_files": generate_image_files(7), } - task_id, _ = _test_create_task( - self._USERNAME, task_spec, task_data, content_type="multipart/form-data" - ) + task_id, _ = create_task(self._USERNAME, task_spec, task_data) # check task size with make_api_client(self._USERNAME) as api_client: (task, _) = api_client.tasks_api.retrieve(task_id) assert task.size == 4 - def test_can_create_task_with_sorting_method(self): + def test_can_create_task_with_sorting_method_natural(self): task_spec = { "name": f"test {self._USERNAME} to create a task with a custom sorting method", "labels": [ { "name": "car", - "color": "#ff00ff", - "attributes": [], } ], } @@ -643,11 +643,7 @@ def test_can_create_task_with_sorting_method(self): "sorting_method": "natural", } - # Besides testing that the sorting method is applied, this also checks for - # regressions of . - task_id, _ = _test_create_task( - self._USERNAME, task_spec, task_data, content_type="multipart/form-data" - ) + task_id, _ = create_task(self._USERNAME, task_spec, task_data) # check that the frames were sorted again with make_api_client(self._USERNAME) as api_client: @@ -657,6 +653,48 @@ def test_can_create_task_with_sorting_method(self): for image_file, frame in zip(image_files, data_meta.frames): assert image_file.name == frame.name + @pytest.mark.parametrize("data_source", ["client_files", "server_files"]) + def test_can_create_task_with_sorting_method_predefined(self, data_source): + task_spec = { + "name": f"test {self._USERNAME} to create a task with a custom sorting method", + "labels": [ + { + "name": "car", + } + ], + } + + if data_source == "client_files": + image_files = generate_image_files(15) + + # shuffle to check for occasional sorting, e.g. in the DB + image_files = image_files[7:] + image_files[5:7] + image_files[:5] + elif data_source == "server_files": + # Files from the test file share + image_files = ["images/image_3.jpg", "images/image_1.jpg", "images/image_2.jpg"] + else: + assert False + + task_data = { + data_source: image_files, + "image_quality": 70, + "sorting_method": "predefined", + } + + (task_id, _) = create_task(self._USERNAME, task_spec, task_data) + + # check that the frames were sorted again + with make_api_client(self._USERNAME) as api_client: + (data_meta, _) = api_client.tasks_api.retrieve_data_meta(task_id) + + for image_file, frame in zip(image_files, data_meta.frames): + if isinstance(image_file, str): + image_name = image_file + else: + image_name = image_file.name + + assert image_name == frame.name + def test_can_get_annotations_from_new_task_with_skeletons(self): spec = { "name": f"test admin1 to create a task with skeleton", @@ -685,9 +723,7 @@ def test_can_get_annotations_from_new_task_with_skeletons(self): "client_files": generate_image_files(3), } - task_id, _ = _test_create_task( - self._USERNAME, spec, task_data, content_type="multipart/form-data" - ) + task_id, _ = create_task(self._USERNAME, spec, task_data) response = get_method(self._USERNAME, "labels", task_id=f"{task_id}") label_ids = {} @@ -862,9 +898,7 @@ def test_create_task_with_cloud_storage_files( } kwargs = {"org": org} if org else {} - _test_create_task( - self._USERNAME, task_spec, data_spec, content_type="application/json", **kwargs - ) + create_task(self._USERNAME, task_spec, data_spec, **kwargs) @pytest.mark.with_external_services @pytest.mark.parametrize("cloud_storage_id", [2]) @@ -961,9 +995,7 @@ def test_create_task_with_cloud_storage_directories_and_excluded_files( if server_files_exclude: data_spec["server_files_exclude"] = server_files_exclude - task_id, _ = _test_create_task( - self._USERNAME, task_spec, data_spec, content_type="application/json", org=org - ) + task_id, _ = create_task(self._USERNAME, task_spec, data_spec, org=org) with make_api_client(self._USERNAME) as api_client: (task, response) = api_client.tasks_api.retrieve(task_id) @@ -1028,7 +1060,7 @@ def test_user_cannot_create_task_with_cloud_storage_without_access( assert False with pytest.raises(exceptions.ApiException) as capture: - _test_create_task(user, task_spec, data_spec, content_type="application/json") + create_task(user, task_spec, data_spec) assert capture.value.status == HTTPStatus.FORBIDDEN @@ -1127,9 +1159,7 @@ def test_create_task_with_file_pattern( data_spec["server_files"] = [f"test/sub/{manifest}" if sub_dir else manifest] if task_size: - task_id, _ = _test_create_task( - self._USERNAME, task_spec, data_spec, content_type="application/json" - ) + task_id, _ = create_task(self._USERNAME, task_spec, data_spec) with make_api_client(self._USERNAME) as api_client: (task, response) = api_client.tasks_api.retrieve(task_id) @@ -1159,7 +1189,7 @@ def test_can_specify_file_job_mapping(self): "job_file_mapping": expected_segments, } - task_id, _ = _test_create_task( + task_id, _ = create_task( self._USERNAME, task_spec, data_spec, content_type="application/json" ) @@ -1187,10 +1217,10 @@ def test_cannot_create_task_with_same_labels(self): "name": "test cannot create task with same labels", "labels": [{"name": "l1"}, {"name": "l1"}], } - response = post_method(self._USERNAME, "/tasks", task_spec) + response = post_method(self._USERNAME, "tasks", task_spec) assert response.status_code == HTTPStatus.BAD_REQUEST - response = get_method(self._USERNAME, "/tasks") + response = get_method(self._USERNAME, "tasks") assert response.status_code == HTTPStatus.OK def test_cannot_create_task_with_same_skeleton_sublabels(self): @@ -1200,10 +1230,10 @@ def test_cannot_create_task_with_same_skeleton_sublabels(self): {"name": "s1", "type": "skeleton", "sublabels": [{"name": "1"}, {"name": "1"}]} ], } - response = post_method(self._USERNAME, "/tasks", task_spec) + response = post_method(self._USERNAME, "tasks", task_spec) assert response.status_code == HTTPStatus.BAD_REQUEST - response = get_method(self._USERNAME, "/tasks") + response = get_method(self._USERNAME, "tasks") assert response.status_code == HTTPStatus.OK @@ -1221,7 +1251,7 @@ def test_can_delete_label(self, tasks, labels, admin_user): label = deepcopy([l for l in labels if l.get("task_id") == task["id"]][0]) label_payload = {"id": label["id"], "deleted": True} - response = patch_method(admin_user, f'/tasks/{task["id"]}', {"labels": [label_payload]}) + response = patch_method(admin_user, f'tasks/{task["id"]}', {"labels": [label_payload]}) assert response.status_code == HTTPStatus.OK, response.content assert response.json()["labels"]["count"] == task["labels"]["count"] - 1 @@ -1241,7 +1271,7 @@ def test_can_delete_skeleton_label(self, tasks, labels, admin_user): task_labels.remove(label) label_payload = {"id": label["id"], "deleted": True} - response = patch_method(admin_user, f'/tasks/{task["id"]}', {"labels": [label_payload]}) + response = patch_method(admin_user, f'tasks/{task["id"]}', {"labels": [label_payload]}) assert response.status_code == HTTPStatus.OK assert response.json()["labels"]["count"] == task["labels"]["count"] - 1 @@ -1253,7 +1283,7 @@ def test_can_rename_label(self, tasks, labels, admin_user): task_labels = deepcopy([l for l in labels if l.get("task_id") == task["id"]]) task_labels[0].update({"name": "new name"}) - response = patch_method(admin_user, f'/tasks/{task["id"]}', {"labels": [task_labels[0]]}) + response = patch_method(admin_user, f'tasks/{task["id"]}', {"labels": [task_labels[0]]}) assert response.status_code == HTTPStatus.OK resulting_labels = self._get_task_labels(task["id"], admin_user) @@ -1266,7 +1296,7 @@ def test_cannot_rename_label_to_duplicate_name(self, tasks, labels, admin_user): label_payload = {"id": task_labels[0]["id"], "name": task_labels[0]["name"]} - response = patch_method(admin_user, f'/tasks/{task["id"]}', {"labels": [label_payload]}) + response = patch_method(admin_user, f'tasks/{task["id"]}', {"labels": [label_payload]}) assert response.status_code == HTTPStatus.BAD_REQUEST assert "All label names must be unique" in response.text @@ -1281,7 +1311,7 @@ def test_cannot_add_foreign_label(self, tasks, labels, admin_user): ][0] ) - response = patch_method(admin_user, f'/tasks/{task["id"]}', {"labels": [new_label]}) + response = patch_method(admin_user, f'tasks/{task["id"]}', {"labels": [new_label]}) assert response.status_code == HTTPStatus.NOT_FOUND assert f"Not found label with id #{new_label['id']} to change" in response.text @@ -1289,7 +1319,7 @@ def test_admin_can_add_label(self, tasks, admin_user): task = [t for t in tasks if t["project_id"] is None][0] new_label = {"name": "new name"} - response = patch_method(admin_user, f'/tasks/{task["id"]}', {"labels": [new_label]}) + response = patch_method(admin_user, f'tasks/{task["id"]}', {"labels": [new_label]}) assert response.status_code == HTTPStatus.OK assert response.json()["labels"]["count"] == task["labels"]["count"] + 1 @@ -1315,9 +1345,8 @@ def test_non_task_staff_privileged_org_members_can_add_label( new_label = {"name": "new name"} response = patch_method( user["username"], - f'/tasks/{task["id"]}', + f'tasks/{task["id"]}', {"labels": [new_label]}, - org_id=task["organization"], ) assert response.status_code == HTTPStatus.OK assert response.json()["labels"]["count"] == task["labels"]["count"] + 1 @@ -1344,9 +1373,8 @@ def test_non_task_staff_org_members_cannot_add_label( new_label = {"name": "new name"} response = patch_method( user["username"], - f'/tasks/{task["id"]}', + f'tasks/{task["id"]}', {"labels": [new_label]}, - org_id=task["organization"], ) assert response.status_code == HTTPStatus.FORBIDDEN @@ -1369,9 +1397,8 @@ def test_task_staff_org_members_can_add_label( new_label = {"name": "new name"} response = patch_method( user["username"], - f'/tasks/{task["id"]}', + f'tasks/{task["id"]}', {"labels": [new_label]}, - org_id=task["organization"], ) assert response.status_code == HTTPStatus.OK assert response.json()["labels"]["count"] == task["labels"]["count"] + 1 @@ -1392,7 +1419,7 @@ def test_admin_can_add_skeleton(self, tasks, admin_user): 'data-element-id="1" data-node-id="1" data-label-name="597501">', } - response = patch_method(admin_user, f'/tasks/{task["id"]}', {"labels": [new_skeleton]}) + response = patch_method(admin_user, f'tasks/{task["id"]}', {"labels": [new_skeleton]}) assert response.status_code == HTTPStatus.OK assert response.json()["labels"]["count"] == task["labels"]["count"] + 1 @@ -1425,9 +1452,7 @@ def test_work_with_task_containing_non_stable_cloud_storage_files( "server_files": cloud_storage_content, } - task_id, _ = _test_create_task( - self._USERNAME, task_spec, data_spec, content_type="application/json" - ) + task_id, _ = create_task(self._USERNAME, task_spec, data_spec) # save image from the "public" bucket and remove it temporary @@ -1690,9 +1715,7 @@ def test_user_cannot_update_task_with_cloud_storage_without_access( "use_cache": True, "server_files": ["images/image_1.jpg"], } - (task_id, _) = _test_create_task( - user, task_spec, data_spec, content_type="application/json" - ) + (task_id, _) = create_task(user, task_spec, data_spec) updated_fields = { field: { @@ -1729,3 +1752,110 @@ def test_can_report_correct_completed_jobs_count(tasks, jobs, admin_user): task, _ = api_client.tasks_api.retrieve(task["id"]) assert task.jobs.completed == 1 + + +class TestImportTaskAnnotations: + def _make_client(self) -> Client: + return Client(BASE_URL, config=Config(status_check_period=0.01)) + + @pytest.fixture(autouse=True) + def setup(self, restore_db_per_function, tmp_path: Path, admin_user: str): + self.tmp_dir = tmp_path + self.client = self._make_client() + self.user = admin_user + self.format = "COCO 1.0" + + with self.client: + self.client.login((self.user, USER_PASS)) + + def _check_annotations(self, task_id): + with make_api_client(self.user) as api_client: + (_, response) = api_client.tasks_api.retrieve_annotations(id=task_id) + assert response.status == HTTPStatus.OK + annotations = json.loads(response.data)["shapes"] + assert len(annotations) > 0 + + def _delete_annotations(self, task_id): + with make_api_client(self.user) as api_client: + (_, response) = api_client.tasks_api.destroy_annotations(id=task_id) + assert response.status == HTTPStatus.NO_CONTENT + + @pytest.mark.timeout(64) + @pytest.mark.parametrize("successful_upload", [True, False]) + def test_can_import_annotations_after_previous_unclear_import( + self, successful_upload: bool, tasks_with_shapes + ): + task_id = tasks_with_shapes[0]["id"] + self._check_annotations(task_id) + + with NamedTemporaryFile() as f: + filename = self.tmp_dir / f"task_{task_id}_{Path(f.name).name}_coco.zip" + + task = self.client.tasks.retrieve(task_id) + task.export_dataset(self.format, filename, include_images=False) + + self._delete_annotations(task_id) + + params = {"format": self.format, "filename": filename.name} + url = self.client.api_map.make_endpoint_url( + self.client.api_client.tasks_api.create_annotations_endpoint.path + ).format(id=task_id) + uploader = Uploader(self.client) + + if successful_upload: + # define time required to upload file with annotations + start_time = time() + task.import_annotations(self.format, filename) + required_time = ceil(time() - start_time) * 2 + self._delete_annotations(task_id) + + response = uploader.upload_file( + url, filename, meta=params, query_params=params, logger=self.client.logger.debug + ) + rq_id = json.loads(response.data)["rq_id"] + assert rq_id + else: + required_time = 54 + uploader._tus_start_upload(url, query_params=params) + uploader._upload_file_data_with_tus( + url, filename, meta=params, logger=self.client.logger.debug + ) + + sleep(required_time) + if successful_upload: + self._check_annotations(task_id) + self._delete_annotations(task_id) + task.import_annotations(self.format, filename) + self._check_annotations(task_id) + + @pytest.mark.timeout(64) + def test_check_import_cache_after_previous_interrupted_upload(self, tasks_with_shapes, request): + task_id = tasks_with_shapes[0]["id"] + with NamedTemporaryFile() as f: + filename = self.tmp_dir / f"task_{task_id}_{Path(f.name).name}_coco.zip" + task = self.client.tasks.retrieve(task_id) + task.export_dataset(self.format, filename, include_images=False) + + params = {"format": self.format, "filename": filename.name} + url = self.client.api_map.make_endpoint_url( + self.client.api_client.tasks_api.create_annotations_endpoint.path + ).format(id=task_id) + + uploader = Uploader(self.client) + uploader._tus_start_upload(url, query_params=params) + uploader._upload_file_data_with_tus( + url, filename, meta=params, logger=self.client.logger.debug + ) + number_of_files = 1 + sleep(30) # wait when the cleaning job from rq worker will be started + command = ["/bin/bash", "-c", f"ls data/tasks/{task_id}/tmp | wc -l"] + platform = request.config.getoption("--platform") + assert platform in ("kube", "local") + func = docker_exec_cvat if platform == "local" else kube_exec_cvat + for _ in range(12): + sleep(2) + result, _ = func(command) + number_of_files = int(result) + if not number_of_files: + break + assert not number_of_files diff --git a/tests/python/rest_api/test_webhooks.py b/tests/python/rest_api/test_webhooks.py index 91086d31e27f..a29bc0c19a9e 100644 --- a/tests/python/rest_api/test_webhooks.py +++ b/tests/python/rest_api/test_webhooks.py @@ -773,6 +773,14 @@ def test_member_can_see_list_of_project_webhooks_in_org( assert response.status_code == HTTPStatus.OK assert DeepDiff(expected_response, response.json()["results"], ignore_order=True) == {} + @pytest.mark.parametrize("field_value, query_value", [(1, 1), (None, "")]) + def test_can_filter_by_org_id(self, field_value, query_value, webhooks): + webhooks = filter(lambda w: w["organization"] == field_value, webhooks) + response = get_method("admin2", f"webhooks", org_id=query_value) + + assert response.status_code == HTTPStatus.OK + assert DeepDiff(list(webhooks), response.json()["results"], ignore_order=True) == {} + @pytest.mark.usefixtures("restore_db_per_function") class TestPatchWebhooks: diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index d32f76b3c61c..e7cca896ae6a 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -140,12 +140,28 @@ def wait_until_task_is_created(api: apis.TasksApi, task_id: int) -> models.RqSta raise Exception("Cannot create task") -def _test_create_task(username, spec, data, content_type, **kwargs): +def create_task(username, spec, data, content_type="application/json", **kwargs): with make_api_client(username) as api_client: (task, response_) = api_client.tasks_api.create(spec, **kwargs) assert response_.status == HTTPStatus.CREATED + sent_upload_start = False + + data_kwargs = (kwargs or {}).copy() + data_kwargs.pop("org", None) + data_kwargs.pop("org_id", None) + if data.get("client_files") and "json" in content_type: + (_, response) = api_client.tasks_api.create_data( + task.id, + data_request=models.DataRequest(image_quality=data["image_quality"]), + upload_start=True, + _content_type=content_type, + **data_kwargs, + ) + assert response.status == HTTPStatus.ACCEPTED + sent_upload_start = True + # Can't encode binary files in json (_, response) = api_client.tasks_api.create_data( task.id, @@ -155,19 +171,27 @@ def _test_create_task(username, spec, data, content_type, **kwargs): ), upload_multiple=True, _content_type="multipart/form-data", - **kwargs, + **data_kwargs, ) assert response.status == HTTPStatus.OK data = data.copy() del data["client_files"] + last_kwargs = {} + if sent_upload_start: + last_kwargs["upload_finish"] = True + (_, response) = api_client.tasks_api.create_data( - task.id, data_request=deepcopy(data), _content_type=content_type + task.id, + data_request=deepcopy(data), + _content_type=content_type, + **data_kwargs, + **last_kwargs, ) assert response.status == HTTPStatus.ACCEPTED status = wait_until_task_is_created(api_client.tasks_api, task.id) - assert status.state.value == "Finished" + assert status.state.value == "Finished", status.message return task.id, response_.headers.get("X-Request-Id") diff --git a/tests/python/sdk/test_tasks.py b/tests/python/sdk/test_tasks.py index 3a8faeddef45..8a5bbf8b5a58 100644 --- a/tests/python/sdk/test_tasks.py +++ b/tests/python/sdk/test_tasks.py @@ -4,6 +4,7 @@ import io import json +import os.path as osp import zipfile from logging import Logger from pathlib import Path @@ -64,6 +65,17 @@ def fxt_new_task(self, fxt_image_file: Path): return task + @pytest.fixture + def fxt_new_task_without_data(self): + task = self.client.tasks.create( + spec={ + "name": "test_task", + "labels": [{"name": "car"}, {"name": "person"}], + }, + ) + + return task + @pytest.fixture def fxt_task_with_shapes(self, fxt_new_task: Task): labels = fxt_new_task.get_labels() @@ -118,7 +130,6 @@ def test_can_create_task_with_local_data(self): task = self.client.tasks.create_from_data( spec=task_spec, data_params=data_params, - resource_type=ResourceType.LOCAL, resources=task_files, pbar=pbar, ) @@ -127,6 +138,27 @@ def test_can_create_task_with_local_data(self): assert "100%" in pbar_out.getvalue().strip("\r").split("\r")[-1] assert self.stdout.getvalue() == "" + def test_can_create_task_with_local_data_and_predefined_sorting( + self, fxt_new_task_without_data: Task + ): + task = fxt_new_task_without_data + + task_files = generate_image_files(6) + task_filenames = [] + for f in task_files: + fname = self.tmp_path / osp.basename(f.name) + fname.write_bytes(f.getvalue()) + task_filenames.append(fname) + + task_filenames = [task_filenames[i] for i in [2, 4, 1, 5, 0, 3]] + + task.upload_data( + resources=task_filenames, + params={"sorting_method": "predefined"}, + ) + + assert [f.name for f in task.get_frames_info()] == [f.name for f in task_filenames] + def test_can_create_task_with_remote_data(self): task = self.client.tasks.create_from_data( spec={ @@ -367,7 +399,7 @@ def _test_can_create_from_backup(self, fxt_new_task: Task, fxt_backup_file: Path assert task.id assert task.id != fxt_new_task.id assert task.size == fxt_new_task.size - assert "imported sucessfully" in self.logger_stream.getvalue() + assert "imported successfully" in self.logger_stream.getvalue() assert "100%" in pbar_out.getvalue().strip("\r").split("\r")[-1] assert self.stdout.getvalue() == "" diff --git a/tests/python/shared/fixtures/init.py b/tests/python/shared/fixtures/init.py index c2fd87671521..792fe36825f2 100644 --- a/tests/python/shared/fixtures/init.py +++ b/tests/python/shared/fixtures/init.py @@ -8,6 +8,7 @@ from pathlib import Path from subprocess import PIPE, CalledProcessError, run from time import sleep +from typing import List, Union import pytest import requests @@ -23,7 +24,6 @@ CONTAINER_NAME_FILES = ["docker-compose.tests.yml"] - DC_FILES = [ "docker-compose.dev.yml", "tests/docker-compose.file_share.yml", @@ -85,7 +85,7 @@ def _run(command, capture_output=True): proc = run(_command, check=True) # nosec return stdout, stderr except CalledProcessError as exc: - stderr = exc.stderr.decode() if capture_output else "see above" + stderr = exc.stderr.decode() or exc.stdout.decode() if capture_output else "see above" pytest.exit( f"Command failed: {command}.\n" f"Error message: {stderr}.\n" @@ -120,13 +120,17 @@ def kube_cp(source, target): _run(f"kubectl cp {source} {target}") -def docker_exec_cvat(command): - _run(f"docker exec {PREFIX}_cvat_server_1 {command}") +def docker_exec_cvat(command: Union[List[str], str]): + base = f"docker exec {PREFIX}_cvat_server_1" + _command = f"{base} {command}" if isinstance(command, str) else base.split() + command + return _run(_command) -def kube_exec_cvat(command): +def kube_exec_cvat(command: Union[List[str], str]): pod_name = _kube_get_server_pod_name() - _run(f"kubectl exec {pod_name} -- {command}") + base = f"kubectl exec {pod_name} --" + _command = f"{base} {command}" if isinstance(command, str) else base.split() + command + return _run(_command) def docker_exec_cvat_db(command): @@ -211,7 +215,7 @@ def create_compose_files(container_name_files): for service_name, service_config in dc_config["services"].items(): service_config.pop("container_name", None) - if service_name == "cvat_server": + if service_name in ("cvat_server", "cvat_utils"): service_env = service_config["environment"] service_env["DJANGO_SETTINGS_MODULE"] = "cvat.settings.testing_rest" diff --git a/tests/python/shared/utils/config.py b/tests/python/shared/utils/config.py index f5a3206c5aff..0e9669fce5d0 100644 --- a/tests/python/shared/utils/config.py +++ b/tests/python/shared/utils/config.py @@ -58,6 +58,10 @@ def post_files_method(username, endpoint, data, files, **kwargs): ) +def put_method(username, endpoint, data, **kwargs): + return requests.put(get_api_url(endpoint, **kwargs), json=data, auth=(username, USER_PASS)) + + def server_get(username, endpoint, **kwargs): return requests.get(get_server_url(endpoint, **kwargs), auth=(username, USER_PASS)) diff --git a/tests/python/shared/utils/resource_import_export.py b/tests/python/shared/utils/resource_import_export.py index 5adf8aecf7c4..9ee8bdec26e7 100644 --- a/tests/python/shared/utils/resource_import_export.py +++ b/tests/python/shared/utils/resource_import_export.py @@ -9,7 +9,7 @@ T = TypeVar("T") -from shared.utils.config import get_method, post_method +from shared.utils.config import get_method, post_method, put_method FILENAME_TEMPLATE = "cvat/{}/{}.zip" EXPORT_FORMAT = "CVAT for images 1.1" @@ -117,9 +117,16 @@ def _import_annotations_from_cloud_storage( response = post_method(user, url, data=None, **kwargs) status = response.status_code + # Only the first POST request contains rq_id in response. + # Exclude cases with 403 expected status. + rq_id = None + if status == HTTPStatus.ACCEPTED: + rq_id = response.json().get("rq_id") + assert rq_id, "The rq_id was not found in the response" + while status != _expect_status: assert status == HTTPStatus.ACCEPTED - response = post_method(user, url, data=None, **kwargs) + response = put_method(user, url, data=None, rq_id=rq_id, **kwargs) status = response.status_code if _check_uploaded: @@ -154,9 +161,16 @@ def _import_dataset_from_cloud_storage( response = post_method(user, url, data=None, **kwargs) status = response.status_code + # Only the first POST request contains rq_id in response. + # Exclude cases with 403 expected status. + rq_id = None + if status == HTTPStatus.ACCEPTED: + rq_id = response.json().get("rq_id") + assert rq_id, "The rq_id was not found in the response" + while status != _expect_status: assert status == HTTPStatus.ACCEPTED - response = get_method(user, url, action="import_status") + response = get_method(user, url, action="import_status", rq_id=rq_id) status = response.status_code def _import_resource(self, cloud_storage: Dict[str, Any], resource_type: str, *args, **kwargs): diff --git a/tests/values.test.yaml b/tests/values.test.yaml index a4d90fc12428..e281ecb32143 100644 --- a/tests/values.test.yaml +++ b/tests/values.test.yaml @@ -14,6 +14,10 @@ cvat: - mountPath: /home/django/share name: cvat-backend-data subPath: share + utils: + additionalEnv: + - name: DJANGO_SETTINGS_MODULE + value: cvat.settings.testing_rest # Images are already present in the node imagePullPolicy: Never frontend: diff --git a/utils/dataset_manifest/core.py b/utils/dataset_manifest/core.py index 7040eb7cc42c..1be0976849c6 100644 --- a/utils/dataset_manifest/core.py +++ b/utils/dataset_manifest/core.py @@ -790,13 +790,13 @@ def _validate_first_item(_dict): # raise InvalidManifestError('Incorrect height field') def is_manifest(full_manifest_path): - return _is_video_manifest(full_manifest_path) or \ - _is_dataset_manifest(full_manifest_path) + return is_video_manifest(full_manifest_path) or \ + is_dataset_manifest(full_manifest_path) -def _is_video_manifest(full_manifest_path): +def is_video_manifest(full_manifest_path): validator = _VideoManifestStructureValidator(full_manifest_path) return validator.validate() -def _is_dataset_manifest(full_manifest_path): +def is_dataset_manifest(full_manifest_path): validator = _DatasetManifestStructureValidator(full_manifest_path) return validator.validate() diff --git a/utils/dataset_manifest/create.py b/utils/dataset_manifest/create.py index e52c4e6acf8b..64efaed60f2d 100755 --- a/utils/dataset_manifest/create.py +++ b/utils/dataset_manifest/create.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # Copyright (C) 2021-2022 Intel Corporation -# Copyright (C) 2022 CVAT.ai Corporation +# Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT @@ -10,9 +10,10 @@ import sys import re from glob import glob + from tqdm import tqdm -from utils import detect_related_images, is_image, is_video +from utils import detect_related_images, is_image, is_video, SortingMethod def get_args(): parser = argparse.ArgumentParser() @@ -21,8 +22,8 @@ def get_args(): 'if by default the video does not meet the requirements and a manifest file is not prepared') parser.add_argument('--output-dir',type=str, help='Directory where the manifest file will be saved', default=os.getcwd()) - parser.add_argument('--sorting', choices=['lexicographical', 'natural', 'predefined', 'random'], - type=str, default='lexicographical') + parser.add_argument('--sorting', choices=[v[0] for v in SortingMethod.choices()], + type=str, default=SortingMethod.LEXICOGRAPHICAL.value) parser.add_argument('source', type=str, help='Source paths') return parser.parse_args()