diff --git a/cvat-core/src/annotations.ts b/cvat-core/src/annotations.ts index 706cbc853ed2..5fd4c17472ab 100644 --- a/cvat-core/src/annotations.ts +++ b/cvat-core/src/annotations.ts @@ -166,6 +166,7 @@ export async function exportDataset( instance, format: string, saveImages: boolean, + onlyAnnotated: boolean, useDefaultSettings: boolean, targetStorage: Storage, name?: string, @@ -177,13 +178,13 @@ export async function exportDataset( let result = null; if (instance instanceof Task) { result = await serverProxy.tasks - .exportDataset(instance.id, format, saveImages, useDefaultSettings, targetStorage, name); + .exportDataset(instance.id, format, saveImages, onlyAnnotated, useDefaultSettings, targetStorage, name); } else if (instance instanceof Job) { result = await serverProxy.jobs - .exportDataset(instance.id, format, saveImages, useDefaultSettings, targetStorage, name); + .exportDataset(instance.id, format, saveImages, onlyAnnotated, useDefaultSettings, targetStorage, name); } else { result = await serverProxy.projects - .exportDataset(instance.id, format, saveImages, useDefaultSettings, targetStorage, name); + .exportDataset(instance.id, format, saveImages, onlyAnnotated, useDefaultSettings, targetStorage, name); } return result; diff --git a/cvat-core/src/project-implementation.ts b/cvat-core/src/project-implementation.ts index 5291dc774318..84fb6f9c722c 100644 --- a/cvat-core/src/project-implementation.ts +++ b/cvat-core/src/project-implementation.ts @@ -111,11 +111,20 @@ export default function implementProject(Project: typeof ProjectClass): typeof P this: ProjectClass, format: Parameters[0], saveImages: Parameters[1], - useDefaultSettings: Parameters[2], - targetStorage: Parameters[3], - customName: Parameters[4], + onlyAnnotated: Parameters[2], + useDefaultSettings: Parameters[3], + targetStorage: Parameters[4], + customName: Parameters[5], ): ReturnType { - const rqID = await exportDataset(this, format, saveImages, useDefaultSettings, targetStorage, customName); + const rqID = await exportDataset( + this, + format, + saveImages, + onlyAnnotated, + useDefaultSettings, + targetStorage, + customName, + ); return rqID; }, }); diff --git a/cvat-core/src/project.ts b/cvat-core/src/project.ts index 831fc2b4f5be..4abfdbac5913 100644 --- a/cvat-core/src/project.ts +++ b/cvat-core/src/project.ts @@ -34,6 +34,7 @@ export default class Project { exportDataset: ( format: string, saveImages: boolean, + onlyAnnotated: boolean, useDefaultSettings: boolean, targetStorage: Storage, name?: string, @@ -265,15 +266,17 @@ Object.defineProperties( async exportDataset( format: Parameters[0], saveImages: Parameters[1], - useDefaultSettings: Parameters[2], - targetStorage: Parameters[3], - customName: Parameters[4], + onlyAnnotated: Parameters[2], + useDefaultSettings: Parameters[3], + targetStorage: Parameters[4], + customName: Parameters[5], ) { const result = await PluginRegistry.apiWrapper.call( this, Project.prototype.annotations.exportDataset, format, saveImages, + onlyAnnotated, useDefaultSettings, targetStorage, customName, diff --git a/cvat-core/src/server-proxy.ts b/cvat-core/src/server-proxy.ts index 91dc52a71821..71e1c79a75f6 100644 --- a/cvat-core/src/server-proxy.ts +++ b/cvat-core/src/server-proxy.ts @@ -38,6 +38,7 @@ type Params = { filename?: string, action?: string, save_images?: boolean, + only_annotated?: boolean, }; tus.defaultOptions.storeFingerprintForResuming = false; @@ -805,6 +806,7 @@ function exportDataset(instanceType: 'projects' | 'jobs' | 'tasks') { id: number, format: string, saveImages: boolean, + onlyAnnotated: boolean, useDefaultSettings: boolean, targetStorage: Storage, name?: string, @@ -817,6 +819,7 @@ function exportDataset(instanceType: 'projects' | 'jobs' | 'tasks') { ...(name ? { filename: name } : {}), format, save_images: saveImages, + only_annotated: onlyAnnotated, }; return new Promise((resolve, reject) => { async function request() { diff --git a/cvat-core/src/session-implementation.ts b/cvat-core/src/session-implementation.ts index fa77c934abde..2d5f6002cb0f 100644 --- a/cvat-core/src/session-implementation.ts +++ b/cvat-core/src/session-implementation.ts @@ -513,11 +513,20 @@ export function implementJob(Job: typeof JobClass): typeof JobClass { this: JobClass, format: Parameters[0], saveImages: Parameters[1], - useDefaultSettings: Parameters[2], - targetStorage: Parameters[3], - customName?: Parameters[4], + onlyAnnotated: Parameters[2], + useDefaultSettings: Parameters[3], + targetStorage: Parameters[4], + customName?: Parameters[5], ): ReturnType { - const rqID = await exportDataset(this, format, saveImages, useDefaultSettings, targetStorage, customName); + const rqID = await exportDataset( + this, + format, + saveImages, + onlyAnnotated, + useDefaultSettings, + targetStorage, + customName, + ); return rqID; }, }); @@ -1169,11 +1178,20 @@ export function implementTask(Task: typeof TaskClass): typeof TaskClass { this: TaskClass, format: Parameters[0], saveImages: Parameters[1], - useDefaultSettings: Parameters[2], - targetStorage: Parameters[3], - customName: Parameters[4], + onlyAnnotated: Parameters[2], + useDefaultSettings: Parameters[3], + targetStorage: Parameters[4], + customName: Parameters[5], ): ReturnType { - const rqID = await exportDataset(this, format, saveImages, useDefaultSettings, targetStorage, customName); + const rqID = await exportDataset( + this, + format, + saveImages, + onlyAnnotated, + useDefaultSettings, + targetStorage, + customName, + ); return rqID; }, }); diff --git a/cvat-core/src/session.ts b/cvat-core/src/session.ts index 1985a72b2683..2e23a68a746c 100644 --- a/cvat-core/src/session.ts +++ b/cvat-core/src/session.ts @@ -173,6 +173,7 @@ function buildDuplicatedAPI(prototype) { async exportDataset( format: string, saveImages: boolean, + onlyAnnotated: boolean, useDefaultSettings: boolean, targetStorage: Storage, customName?: string, @@ -182,6 +183,7 @@ function buildDuplicatedAPI(prototype) { prototype.annotations.exportDataset, format, saveImages, + onlyAnnotated, useDefaultSettings, targetStorage, customName, @@ -360,6 +362,7 @@ export class Session { exportDataset: ( format: string, saveImages: boolean, + onlyAnnotated: boolean, useDefaultSettings: boolean, targetStorage: Storage, name?: string, diff --git a/cvat-sdk/cvat_sdk/core/proxies/model_proxy.py b/cvat-sdk/cvat_sdk/core/proxies/model_proxy.py index 40b6ffd27549..1b5d9777cf59 100644 --- a/cvat-sdk/cvat_sdk/core/proxies/model_proxy.py +++ b/cvat-sdk/cvat_sdk/core/proxies/model_proxy.py @@ -230,6 +230,8 @@ def export( status_check_period: Optional[int] = None, location: Optional[Location] = None, cloud_storage_id: Optional[int] = None, + save_images: Optional[bool] = None, + only_annotated: Optional[bool] = None, **query_params, ) -> None: query_params = { @@ -254,6 +256,10 @@ def export( if not local_downloading: query_params["filename"] = str(filename) + if save_images and only_annotated: + query_params["save_images"] = save_images + query_params["only_annotated"] = only_annotated + downloader = Downloader(self._client) export_request = downloader.prepare_file( endpoint, @@ -300,6 +306,7 @@ def export_dataset( pbar: Optional[ProgressReporter] = None, status_check_period: Optional[int] = None, include_images: bool = True, + only_annotated: bool = False, location: Optional[Location] = None, cloud_storage_id: Optional[int] = None, ) -> None: @@ -332,6 +339,7 @@ def export_dataset( cloud_storage_id=cloud_storage_id, format=format_name, save_images=include_images, + only_annotated=only_annotated, ) self._client.logger.info( diff --git a/cvat-ui/src/actions/export-actions.ts b/cvat-ui/src/actions/export-actions.ts index db59a6315c6a..eaed847e1089 100644 --- a/cvat-ui/src/actions/export-actions.ts +++ b/cvat-ui/src/actions/export-actions.ts @@ -103,6 +103,7 @@ export const exportDatasetAsync = ( instance: ProjectOrTaskOrJob, format: string, saveImages: boolean, + onlyAnnotated: boolean, useDefaultSettings: boolean, targetStorage: Storage, name?: string, @@ -114,7 +115,7 @@ export const exportDatasetAsync = ( try { const rqID = await instance.annotations - .exportDataset(format, saveImages, useDefaultSettings, targetStorage, name); + .exportDataset(format, saveImages, onlyAnnotated, useDefaultSettings, targetStorage, name); if (shouldListenForProgress(rqID, state.requests)) { await listenExportDatasetAsync(rqID, dispatch, { instance, format, saveImages, diff --git a/cvat-ui/src/components/export-dataset/export-dataset-modal.tsx b/cvat-ui/src/components/export-dataset/export-dataset-modal.tsx index c11df51c72b7..1b3f5e4a4edd 100644 --- a/cvat-ui/src/components/export-dataset/export-dataset-modal.tsx +++ b/cvat-ui/src/components/export-dataset/export-dataset-modal.tsx @@ -16,6 +16,7 @@ import Input from 'antd/lib/input'; import Form from 'antd/lib/form'; import Switch from 'antd/lib/switch'; import Space from 'antd/lib/space'; +import Radio from 'antd/lib/radio'; import TargetStorageField from 'components/storage/target-storage-field'; import CVATMarkdown from 'components/common/cvat-markdown'; import { CombinedState, StorageLocation } from 'reducers'; @@ -27,6 +28,7 @@ import { type FormValues = { selectedFormat: string | undefined; saveImages: boolean; + onlyAnnotated: boolean; customName: string | undefined; targetStorage: StorageData; useProjectTargetStorage: boolean; @@ -35,6 +37,7 @@ type FormValues = { const initialValues: FormValues = { selectedFormat: undefined, saveImages: false, + onlyAnnotated: false, customName: undefined, targetStorage: { location: StorageLocation.LOCAL, @@ -52,6 +55,7 @@ function ExportDatasetModal(props: StateToProps): JSX.Element { const [instanceType, setInstanceType] = useState(''); const [useDefaultTargetStorage, setUseDefaultTargetStorage] = useState(true); + const [saveImages, setSaveImages] = useState(false); const [form] = Form.useForm(); const [targetStorage, setTargetStorage] = useState({ location: StorageLocation.LOCAL, @@ -94,6 +98,7 @@ function ExportDatasetModal(props: StateToProps): JSX.Element { const closeModal = (): void => { setUseDefaultTargetStorage(true); setTargetStorage({ location: StorageLocation.LOCAL }); + setSaveImages(false); form.resetFields(); if (instance) { dispatch(exportActions.closeExportDatasetModal(instance)); @@ -108,6 +113,7 @@ function ExportDatasetModal(props: StateToProps): JSX.Element { instance as ProjectOrTaskOrJob, values.selectedFormat as string, values.saveImages, + values.onlyAnnotated, useDefaultTargetStorage, useDefaultTargetStorage ? new Storage({ location: defaultStorageLocation, @@ -180,11 +186,28 @@ function ExportDatasetModal(props: StateToProps): JSX.Element { name='saveImages' valuePropName='checked' > - - + { + setSaveImages(checked); + form.setFieldsValue({ saveImages: checked }); + }} + /> Save images + {saveImages && ( + Image saving option} + > + + All images + Images with annotations only + + + )} + Custom name} name='customName'> 0: + dm_items.append(dm_item) self._items = dm_items @@ -1662,6 +1664,7 @@ def __init__( project_data: ProjectData, *, include_images: bool = False, + only_annotated: bool = False, format_type: str = None, dimension: DimensionType = DimensionType.DIM_2D, **kwargs @@ -1729,8 +1732,10 @@ def __init__( annotations=dm_anno, media=PointCloud(dm_image[0]), related_images=dm_image[1], attributes=attributes, subset=frame_data.subset ) - dm_items.append(dm_item) - + + if not only_annotated or len(dm_anno) > 0: + dm_items.append(dm_item) + self._items = dm_items def categories(self): @@ -1746,12 +1751,14 @@ def __len__(self): def GetCVATDataExtractor( instance_data: Union[ProjectData, CommonData], include_images: bool = False, + only_annotated: bool = False, format_type: str = None, dimension: DimensionType = DimensionType.DIM_2D, **kwargs ): kwargs.update({ 'include_images': include_images, + 'only_annotated': only_annotated, 'format_type': format_type, 'dimension': dimension, }) diff --git a/cvat/apps/dataset_manager/formats/camvid.py b/cvat/apps/dataset_manager/formats/camvid.py index 75cea9e98bd4..c839ed6d73b7 100644 --- a/cvat/apps/dataset_manager/formats/camvid.py +++ b/cvat/apps/dataset_manager/formats/camvid.py @@ -16,8 +16,8 @@ @exporter(name='CamVid', ext='ZIP', version='1.0') -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) dataset.transform('polygons_to_masks') diff --git a/cvat/apps/dataset_manager/formats/cityscapes.py b/cvat/apps/dataset_manager/formats/cityscapes.py index ea39578ea3f3..164bcd9f554f 100644 --- a/cvat/apps/dataset_manager/formats/cityscapes.py +++ b/cvat/apps/dataset_manager/formats/cityscapes.py @@ -19,8 +19,8 @@ @exporter(name='Cityscapes', ext='ZIP', version='1.0') -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) dataset.transform('polygons_to_masks') diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index 6d63aeb0360f..744276b849db 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -16,8 +16,8 @@ from .registry import dm_env, exporter, importer @exporter(name='COCO', ext='ZIP', version='1.0') -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.export(temp_dir, 'coco_instances', save_images=save_images, merge_images=False) @@ -40,8 +40,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs import_dm_annotations(dataset, instance_data) @exporter(name='COCO Keypoints', ext='ZIP', version='1.0') -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.export(temp_dir, 'coco_person_keypoints', save_images=save_images, merge_images=False) diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 0191dfe1c8c4..faebc7d9988f 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -1370,7 +1370,12 @@ def dump_project_anno(dst_file: BufferedWriter, project_data: ProjectData, callb callback(dumper, project_data) dumper.close_document() -def dump_media_files(instance_data: CommonData, img_dir: str, project_data: ProjectData = None): +def dump_media_files( + instance_data: CommonData, + img_dir: str, + project_data: ProjectData = None, + only_annotated: bool = False +): ext = '' if instance_data.meta[instance_data.META_FIELD]['mode'] == 'interpolation': ext = FrameProvider.VIDEO_FRAME_EXT @@ -1380,7 +1385,13 @@ def dump_media_files(instance_data: CommonData, img_dir: str, project_data: Proj instance_data.start, instance_data.stop, frame_provider.Quality.ORIGINAL, frame_provider.Type.BUFFER) - for frame_id, (frame_data, _) in zip(instance_data.rel_range, frames): + + if only_annotated: + annotated_frame_ids = {frame.idx for frame in instance_data.group_by_frame(include_empty=False) if frame.labels} + else: + annotated_frame_ids = set(instance_data.rel_range) + annotated_frame_ids = set(annotated_frame_ids) + for frame_id, (frame_data, _) in zip(annotated_frame_ids, frames): if (project_data is not None and (instance_data.db_instance.id, frame_id) in project_data.deleted_frames) \ or frame_id in instance_data.deleted_frames: continue @@ -1391,17 +1402,17 @@ def dump_media_files(instance_data: CommonData, img_dir: str, project_data: Proj with open(img_path, 'wb') as f: f.write(frame_data.getvalue()) -def _export_task_or_job(dst_file, temp_dir, instance_data, anno_callback, save_images=False): +def _export_task_or_job(dst_file, temp_dir, instance_data, anno_callback, save_images=False, only_annotated=False): with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f: dump_task_or_job_anno(f, instance_data, anno_callback) if save_images: - dump_media_files(instance_data, osp.join(temp_dir, 'images')) + dump_media_files(instance_data, osp.join(temp_dir, 'images'), only_annotated=only_annotated) make_zip_archive(temp_dir, dst_file) def _export_project(dst_file: str, temp_dir: str, project_data: ProjectData, - anno_callback: Callable, save_images: bool=False + anno_callback: Callable, save_images: bool=False, only_annotated: bool=False ): with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f: dump_project_anno(f, project_data, anno_callback) @@ -1411,27 +1422,27 @@ def _export_project(dst_file: str, temp_dir: str, project_data: ProjectData, subset = get_defaulted_subset(task_data.db_instance.subset, project_data.subsets) subset_dir = osp.join(temp_dir, 'images', subset) os.makedirs(subset_dir, exist_ok=True) - dump_media_files(task_data, subset_dir, project_data) + dump_media_files(task_data, subset_dir, project_data, only_annotated=only_annotated) make_zip_archive(temp_dir, dst_file) @exporter(name='CVAT for video', ext='ZIP', version='1.1') -def _export_video(dst_file, temp_dir, instance_data, save_images=False): +def _export_video(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): if isinstance(instance_data, ProjectData): _export_project(dst_file, temp_dir, instance_data, - anno_callback=dump_as_cvat_interpolation, save_images=save_images) + anno_callback=dump_as_cvat_interpolation, save_images=save_images, only_annotated=only_annotated) else: _export_task_or_job(dst_file, temp_dir, instance_data, - anno_callback=dump_as_cvat_interpolation, save_images=save_images) + anno_callback=dump_as_cvat_interpolation, save_images=save_images, only_annotated=only_annotated) @exporter(name='CVAT for images', ext='ZIP', version='1.1') -def _export_images(dst_file, temp_dir, instance_data, save_images=False): +def _export_images(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): if isinstance(instance_data, ProjectData): _export_project(dst_file, temp_dir, instance_data, - anno_callback=dump_as_cvat_annotation, save_images=save_images) + anno_callback=dump_as_cvat_annotation, save_images=save_images, only_annotated=only_annotated) else: _export_task_or_job(dst_file, temp_dir, instance_data, - anno_callback=dump_as_cvat_annotation, save_images=save_images) + anno_callback=dump_as_cvat_annotation, save_images=save_images, only_annotated=only_annotated) @importer(name='CVAT', ext='XML, ZIP', version='1.1') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): diff --git a/cvat/apps/dataset_manager/formats/datumaro.py b/cvat/apps/dataset_manager/formats/datumaro.py index 090397b7a471..bd15168f62cd 100644 --- a/cvat/apps/dataset_manager/formats/datumaro.py +++ b/cvat/apps/dataset_manager/formats/datumaro.py @@ -25,8 +25,8 @@ def transform_item(self, item): @exporter(name="Datumaro", ext="ZIP", version="1.0") -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data=instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data=instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) if not save_images: dataset.transform(DeleteImagePath) @@ -46,9 +46,9 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs import_dm_annotations(dataset, instance_data) @exporter(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D) -def _export(dst_file, temp_dir, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): with GetCVATDataExtractor( - instance_data=instance_data, include_images=save_images, + instance_data=instance_data, include_images=save_images, only_annotated=only_annotated, dimension=DimensionType.DIM_3D, ) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) diff --git a/cvat/apps/dataset_manager/formats/icdar.py b/cvat/apps/dataset_manager/formats/icdar.py index 5d031eef82b0..25fdf4d19e36 100644 --- a/cvat/apps/dataset_manager/formats/icdar.py +++ b/cvat/apps/dataset_manager/formats/icdar.py @@ -77,8 +77,8 @@ def transform_item(self, item): return item.wrap(annotations=annotations) @exporter(name='ICDAR Recognition', ext='ZIP', version='1.0') -def _export_recognition(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export_recognition(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(LabelToCaption) dataset.export(temp_dir, 'icdar_word_recognition', save_images=save_images) @@ -100,8 +100,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs @exporter(name='ICDAR Localization', ext='ZIP', version='1.0') -def _export_localization(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export_localization(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.export(temp_dir, 'icdar_text_localization', save_images=save_images) @@ -122,8 +122,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs @exporter(name='ICDAR Segmentation', ext='ZIP', version='1.0') -def _export_segmentation(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export_segmentation(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) dataset.transform('polygons_to_masks') diff --git a/cvat/apps/dataset_manager/formats/imagenet.py b/cvat/apps/dataset_manager/formats/imagenet.py index fd5e9a99a176..bc41f65d94a4 100644 --- a/cvat/apps/dataset_manager/formats/imagenet.py +++ b/cvat/apps/dataset_manager/formats/imagenet.py @@ -17,8 +17,8 @@ @exporter(name='ImageNet', ext='ZIP', version='1.0') -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) if save_images: dataset.export(temp_dir, 'imagenet', save_images=save_images) diff --git a/cvat/apps/dataset_manager/formats/kitti.py b/cvat/apps/dataset_manager/formats/kitti.py index 01e1cd3fc4bc..d99699671b6a 100644 --- a/cvat/apps/dataset_manager/formats/kitti.py +++ b/cvat/apps/dataset_manager/formats/kitti.py @@ -19,8 +19,8 @@ @exporter(name='KITTI', ext='ZIP', version='1.0') -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py index be9679f268e8..2b01468a92fb 100644 --- a/cvat/apps/dataset_manager/formats/labelme.py +++ b/cvat/apps/dataset_manager/formats/labelme.py @@ -15,8 +15,8 @@ @exporter(name='LabelMe', ext='ZIP', version='3.0') -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.export(temp_dir, 'label_me', save_images=save_images) diff --git a/cvat/apps/dataset_manager/formats/lfw.py b/cvat/apps/dataset_manager/formats/lfw.py index 0af356332bb5..1e70f6cd45c5 100644 --- a/cvat/apps/dataset_manager/formats/lfw.py +++ b/cvat/apps/dataset_manager/formats/lfw.py @@ -24,8 +24,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs import_dm_annotations(dataset, instance_data) @exporter(name='LFW', ext='ZIP', version='1.0') -def _exporter(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _exporter(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.export(temp_dir, format='lfw', save_images=save_images) diff --git a/cvat/apps/dataset_manager/formats/market1501.py b/cvat/apps/dataset_manager/formats/market1501.py index 6be8b2fcf75f..eed68be46c49 100644 --- a/cvat/apps/dataset_manager/formats/market1501.py +++ b/cvat/apps/dataset_manager/formats/market1501.py @@ -61,8 +61,8 @@ def transform_item(self, item): @exporter(name='Market-1501', ext='ZIP', version='1.0') -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(LabelAttrToAttr, label='market-1501') diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py index f003f68383e7..838935ac7655 100644 --- a/cvat/apps/dataset_manager/formats/mask.py +++ b/cvat/apps/dataset_manager/formats/mask.py @@ -15,8 +15,8 @@ from .utils import make_colormap @exporter(name='Segmentation mask', ext='ZIP', version='1.1') -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) dataset.transform('polygons_to_masks') diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index 4030d865c742..be5ab6a6a290 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -93,8 +93,8 @@ def _import_to_task(dataset, instance_data): @exporter(name='MOT', ext='ZIP', version='1.1') -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = dm.Dataset.from_extractors(extractor, env=dm_env) dataset.export(temp_dir, 'mot_seq_gt', save_images=save_images) diff --git a/cvat/apps/dataset_manager/formats/mots.py b/cvat/apps/dataset_manager/formats/mots.py index 9ed156e6cd4e..08bfa0032beb 100644 --- a/cvat/apps/dataset_manager/formats/mots.py +++ b/cvat/apps/dataset_manager/formats/mots.py @@ -93,8 +93,8 @@ def _import_to_task(dataset, instance_data): instance_data.add_track(track) @exporter(name='MOTS PNG', ext='ZIP', version='1.0') -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(KeepTracks) # can only export tracks dataset.transform(RotatedBoxesToPolygons) diff --git a/cvat/apps/dataset_manager/formats/openimages.py b/cvat/apps/dataset_manager/formats/openimages.py index 51fcee29a2fb..b17ccdb32762 100644 --- a/cvat/apps/dataset_manager/formats/openimages.py +++ b/cvat/apps/dataset_manager/formats/openimages.py @@ -38,8 +38,8 @@ def find_item_ids(path): yield row.split(',')[0] @exporter(name='Open Images V6', ext='ZIP', version='1.0') -def _export(dst_file, temp_dir, task_data, save_images=False): - with GetCVATDataExtractor(task_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, task_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(task_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) dataset.transform('polygons_to_masks') diff --git a/cvat/apps/dataset_manager/formats/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py index a0d84b745d73..85d37595e681 100644 --- a/cvat/apps/dataset_manager/formats/pascal_voc.py +++ b/cvat/apps/dataset_manager/formats/pascal_voc.py @@ -19,8 +19,8 @@ @exporter(name='PASCAL VOC', ext='ZIP', version='1.1') -def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.export(temp_dir, 'voc', save_images=save_images, diff --git a/cvat/apps/dataset_manager/formats/pointcloud.py b/cvat/apps/dataset_manager/formats/pointcloud.py index 6ddfbb495427..c484991bdfd3 100644 --- a/cvat/apps/dataset_manager/formats/pointcloud.py +++ b/cvat/apps/dataset_manager/formats/pointcloud.py @@ -16,9 +16,9 @@ @exporter(name='Sly Point Cloud Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) -def _export_images(dst_file, temp_dir, task_data, save_images=False): +def _export_images(dst_file, temp_dir, task_data, save_images=False, only_annotated=False): with GetCVATDataExtractor( - task_data, include_images=save_images, format_type='sly_pointcloud', + task_data, include_images=save_images, only_annotated=only_annotated, format_type='sly_pointcloud', dimension=DimensionType.DIM_3D, ) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) diff --git a/cvat/apps/dataset_manager/formats/velodynepoint.py b/cvat/apps/dataset_manager/formats/velodynepoint.py index 9912d0b1d67b..3d5b4374f9f2 100644 --- a/cvat/apps/dataset_manager/formats/velodynepoint.py +++ b/cvat/apps/dataset_manager/formats/velodynepoint.py @@ -26,9 +26,9 @@ def transform_item(self, item): return item.wrap(annotations=annotations) @exporter(name='Kitti Raw Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) -def _export_images(dst_file, temp_dir, task_data, save_images=False): +def _export_images(dst_file, temp_dir, task_data, save_images=False, only_annotated=False): with GetCVATDataExtractor( - task_data, include_images=save_images, format_type="kitti_raw", + task_data, include_images=save_images, only_annotated=only_annotated, format_type="kitti_raw", dimension=DimensionType.DIM_3D, ) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) diff --git a/cvat/apps/dataset_manager/formats/vggface2.py b/cvat/apps/dataset_manager/formats/vggface2.py index 642171f0f8d9..42cc1b855387 100644 --- a/cvat/apps/dataset_manager/formats/vggface2.py +++ b/cvat/apps/dataset_manager/formats/vggface2.py @@ -15,10 +15,10 @@ @exporter(name='VGGFace2', ext='ZIP', version='1.0') -def _export(dst_file, temp_dir, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, 'vgg_face2', save_images=save_images) + dataset.export(temp_dir, 'vgg_face2', save_images=save_images, only_annotated=only_annotated) make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/formats/widerface.py b/cvat/apps/dataset_manager/formats/widerface.py index 12a9bf0d21e5..331bcd6277c0 100644 --- a/cvat/apps/dataset_manager/formats/widerface.py +++ b/cvat/apps/dataset_manager/formats/widerface.py @@ -15,10 +15,10 @@ @exporter(name='WiderFace', ext='ZIP', version='1.0') -def _export(dst_file, temp_dir, instance_data, save_images=False): +def _export(dst_file, temp_dir, instance_data, save_images=False, only_annotated=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, 'wider_face', save_images=save_images) + dataset.export(temp_dir, 'wider_face', save_images=save_images, only_annotated=only_annotated) make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py index a8c8177b055b..3432b0ff07a9 100644 --- a/cvat/apps/dataset_manager/formats/yolo.py +++ b/cvat/apps/dataset_manager/formats/yolo.py @@ -22,8 +22,8 @@ from .registry import dm_env, exporter, importer -def _export_common(dst_file, temp_dir, instance_data, format_name, *, save_images=False): - with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: +def _export_common(dst_file, temp_dir, instance_data, format_name, *, save_images=False, only_annotated=False): + with GetCVATDataExtractor(instance_data, include_images=save_images, only_annotated=only_annotated) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.export(temp_dir, format_name, save_images=save_images) diff --git a/cvat/apps/dataset_manager/project.py b/cvat/apps/dataset_manager/project.py index 977b1fbad3ba..5c8e5ed042a9 100644 --- a/cvat/apps/dataset_manager/project.py +++ b/cvat/apps/dataset_manager/project.py @@ -26,7 +26,7 @@ dlogger = DatasetLogManager() def export_project(project_id, dst_file, format_name, - server_url=None, save_images=False): + server_url=None, save_images=False, only_annotated=False): # For big tasks dump function may run for a long time and # we dont need to acquire lock after the task has been initialized from DB. # But there is the bug with corrupted dump file in case 2 or @@ -38,7 +38,7 @@ def export_project(project_id, dst_file, format_name, exporter = make_exporter(format_name) with open(dst_file, 'wb') as f: - project.export(f, exporter, host=server_url, save_images=save_images) + project.export(f, exporter, host=server_url, save_images=save_images, only_annotated=only_annotated) class ProjectAnnotationAndData: def __init__(self, pk: int): diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index d7d16f8cba33..aa3f570e68d1 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -915,7 +915,7 @@ def delete_job_data(pk): annotation = JobAnnotation(pk) annotation.delete() -def export_job(job_id, dst_file, format_name, server_url=None, save_images=False): +def export_job(job_id, dst_file, format_name, server_url=None, save_images=False, only_annotated=False): # For big tasks dump function may run for a long time and # we dont need to acquire lock after the task has been initialized from DB. # But there is the bug with corrupted dump file in case 2 or @@ -927,7 +927,7 @@ def export_job(job_id, dst_file, format_name, server_url=None, save_images=False exporter = make_exporter(format_name) with open(dst_file, 'wb') as f: - job.export(f, exporter, host=server_url, save_images=save_images) + job.export(f, exporter, host=server_url, save_images=save_images, only_annotated=only_annotated) @silk_profile(name="GET task data") @transaction.atomic @@ -964,7 +964,7 @@ def delete_task_data(pk): annotation = TaskAnnotation(pk) annotation.delete() -def export_task(task_id, dst_file, format_name, server_url=None, save_images=False): +def export_task(task_id, dst_file, format_name, server_url=None, save_images=False, only_annotated=False): # For big tasks dump function may run for a long time and # we dont need to acquire lock after the task has been initialized from DB. # But there is the bug with corrupted dump file in case 2 or @@ -976,7 +976,7 @@ def export_task(task_id, dst_file, format_name, server_url=None, save_images=Fal exporter = make_exporter(format_name) with open(dst_file, 'wb') as f: - task.export(f, exporter, host=server_url, save_images=save_images) + task.export(f, exporter, host=server_url, save_images=save_images, only_annotated=only_annotated) @transaction.atomic def import_task_annotations(src_file, task_id, format_name, conv_mask_to_poly): diff --git a/cvat/apps/dataset_manager/util.py b/cvat/apps/dataset_manager/util.py index b5ed83ed58b1..f0c4c1bb0445 100644 --- a/cvat/apps/dataset_manager/util.py +++ b/cvat/apps/dataset_manager/util.py @@ -160,14 +160,19 @@ def get_export_cache_dir(db_instance: Project | Task | Job) -> str: def make_export_filename( dst_dir: str, save_images: bool, + only_annotated: bool, instance_timestamp: float, format_name: str, ) -> str: from .formats.registry import EXPORT_FORMATS file_ext = EXPORT_FORMATS[format_name].EXT + dataset_type = 'dataset' if save_images else 'annotations' + if only_annotated: + dataset_type += "-annotated_only" + filename = '%s-instance%f-%s.%s' % ( - 'dataset' if save_images else 'annotations', + dataset_type, # store the instance timestamp in the file name to reliably get this information # ctime / mtime do not return file creation time on linux # mtime is used for file usage checks diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index 35e40c8c03a3..d62df6dd9de6 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -97,7 +97,7 @@ def _patched_retry(*_1, **_2): setattr(current_rq_job, 'retry', _patched_retry) return current_rq_job -def export(dst_format, project_id=None, task_id=None, job_id=None, server_url=None, save_images=False): +def export(dst_format, project_id=None, task_id=None, job_id=None, server_url=None, save_images=False, only_annotated=False): try: if task_id is not None: logger = slogger.task[task_id] @@ -129,7 +129,7 @@ def export(dst_format, project_id=None, task_id=None, job_id=None, server_url=No instance_update_time = max(tasks_update + [instance_update_time]) output_path = make_export_filename( - cache_dir, save_images, instance_update_time.timestamp(), dst_format + cache_dir, save_images, only_annotated, instance_update_time.timestamp(), dst_format ) os.makedirs(cache_dir, exist_ok=True) @@ -144,7 +144,7 @@ def export(dst_format, project_id=None, task_id=None, job_id=None, server_url=No with tempfile.TemporaryDirectory(dir=cache_dir) as temp_dir: temp_file = osp.join(temp_dir, 'result') export_fn(db_instance.id, temp_file, dst_format, - server_url=server_url, save_images=save_images) + server_url=server_url, save_images=save_images, only_annotated=only_annotated) os.replace(temp_file, output_path) scheduler: Scheduler = django_rq.get_scheduler( @@ -185,22 +185,32 @@ def export(dst_format, project_id=None, task_id=None, job_id=None, server_url=No raise def export_job_annotations(job_id, dst_format=None, server_url=None): - return export(dst_format,job_id=job_id, server_url=server_url, save_images=False) + return export(dst_format,job_id=job_id, server_url=server_url, save_images=False, only_annotated=False) + +def export_job_as_dataset_only_annotated(job_id, dst_format=None, server_url=None): + return export(dst_format, job_id=job_id, server_url=server_url, save_images=True, only_annotated=True) def export_job_as_dataset(job_id, dst_format=None, server_url=None): - return export(dst_format, job_id=job_id, server_url=server_url, save_images=True) + return export(dst_format, job_id=job_id, server_url=server_url, save_images=True, only_annotated=False) + +def export_task_annotations(task_id, dst_format=None, server_url=None): + return export(dst_format,task_id=task_id, server_url=server_url, save_images=False, only_annotated=False) + +def export_task_as_dataset_only_annotated(task_id, dst_format=None, server_url=None): + return export(dst_format, task_id=task_id, server_url=server_url, save_images=True, only_annotated=True) def export_task_as_dataset(task_id, dst_format=None, server_url=None): - return export(dst_format, task_id=task_id, server_url=server_url, save_images=True) + return export(dst_format, task_id=task_id, server_url=server_url, save_images=True, only_annotated=False) -def export_task_annotations(task_id, dst_format=None, server_url=None): - return export(dst_format,task_id=task_id, server_url=server_url, save_images=False) +def export_project_annotations(project_id, dst_format=None, server_url=None): + return export(dst_format, project_id=project_id, server_url=server_url, save_images=False, only_annotated=False) + +def export_project_as_dataset_only_annotated(project_id, dst_format=None, server_url=None): + return export(dst_format, project_id=project_id, server_url=server_url, save_images=True, only_annotated=True) def export_project_as_dataset(project_id, dst_format=None, server_url=None): - return export(dst_format, project_id=project_id, server_url=server_url, save_images=True) + return export(dst_format, project_id=project_id, server_url=server_url, save_images=True, only_annotated=False) -def export_project_annotations(project_id, dst_format=None, server_url=None): - return export(dst_format, project_id=project_id, server_url=server_url, save_images=False) class FileIsBeingUsedError(Exception): diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 441d4702014d..a9454ed8bc2c 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -159,6 +159,7 @@ class ExportArgs: format: str filename: str save_images: bool + only_annotated: bool location_config: Dict[str, Any] @property @@ -171,6 +172,7 @@ def __init__( request: Request, export_callback: Callable, save_images: Optional[bool] = None, + only_annotated: Optional[bool] = None, *, version: int = 2, ) -> None: @@ -186,6 +188,11 @@ def __init__( if save_images is not None else to_bool(request.query_params.get("save_images", False)) ) + only_annotated = ( + only_annotated + if only_annotated is not None + else to_bool(request.query_params.get("only_annotated", False)) + ) try: location_config = get_location_configuration( @@ -207,6 +214,7 @@ def __init__( format=format_name, filename=filename, save_images=save_images, + only_annotated=only_annotated, location_config=location_config, ) @@ -237,6 +245,7 @@ def handle_local_download() -> Response: timestamp=instance_timestamp, format_name=self.export_args.format, is_annotation_file=not self.export_args.save_images, + only_annotated = self.export_args.only_annotated, extension=osp.splitext(file_path)[1], ) @@ -398,6 +407,7 @@ def export(self) -> Response: format_name=self.export_args.format, cloud_storage_id=self.export_args.location_config.get("storage_id"), save_images=self.export_args.save_images, + only_annotated=self.export_args.only_annotated, ) serializer = RqIdSerializer(data={"rq_id": rq_id}) @@ -451,6 +461,7 @@ def setup_background_job( timestamp=instance_timestamp, format_name=self.export_args.format, is_annotation_file=not self.export_args.save_images, + only_annotated = self.export_args.only_annotated, ) func = export_resource_to_cloud_storage func_args = ( diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 3e48bf85327e..78e21c04e871 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -427,9 +427,11 @@ def export_dataset_v1( get_data: Optional[Callable[[int], Dict[str, Any]]] = None, ) -> Response: if request.query_params.get("format"): - callback = self.get_export_callback(save_images) - dataset_export_manager = DatasetExportManager(self._object, request, callback, save_images=save_images, version=1) + only_annotated = to_bool(request.query_params.get('only_annotated', False)) + callback = self.get_export_callback(save_images, only_annotated) + + dataset_export_manager = DatasetExportManager(self._object, request, callback, save_images=save_images, only_annotated=only_annotated, version=1) return dataset_export_manager.export() if not get_data: @@ -458,6 +460,8 @@ def export_dataset_v1( location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), OpenApiParameter('save_images', description='Include images or not', location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, default=False), + OpenApiParameter('only_annotated', description='Include all images or only annotated', + location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, default=False), ], request=OpenApiTypes.NONE, responses={ @@ -471,9 +475,10 @@ def export_dataset_v2(self, request: HttpRequest, pk: int): self._object = self.get_object() # force call of check_object_permissions() save_images = is_dataset_export(request) - callback = self.get_export_callback(save_images) + only_annotated = to_bool(request.query_params.get('only_annotated', False)) + callback = self.get_export_callback(save_images, only_annotated) - dataset_export_manager = DatasetExportManager(self._object, request, callback, save_images=save_images, version=2) + dataset_export_manager = DatasetExportManager(self._object, request, callback, save_images=save_images, only_annotated=only_annotated, version=2) return dataset_export_manager.export() # FUTURE-TODO: migrate to new API diff --git a/cvat/apps/engine/utils.py b/cvat/apps/engine/utils.py index 01748778339c..6cda844dc243 100644 --- a/cvat/apps/engine/utils.py +++ b/cvat/apps/engine/utils.py @@ -390,11 +390,17 @@ def build_annotations_file_name( timestamp: str, format_name: str, is_annotation_file: bool = True, + only_annotated: bool = False, extension: str = "{}", ) -> str: - # "____.zip" + # "____.zip" + + dataset_type = 'annotations' if is_annotation_file else ('dataset') + if(only_annotated): + dataset_type += "-annotated_only" + return "{}_{}_{}_{}_{}{}".format( - class_name, identifier, 'annotations' if is_annotation_file else 'dataset', + class_name, identifier, dataset_type, timestamp, format_name, extension, ).lower() diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 05a50857b28f..6643e6c4dd8f 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -311,8 +311,13 @@ def perform_create(self, serializer, **kwargs): # Required for the extra summary information added in the queryset serializer.instance = self.get_queryset().get(pk=serializer.instance.pk) - def get_export_callback(self, save_images: bool) -> Callable: - return dm.views.export_project_as_dataset if save_images else dm.views.export_project_annotations + def get_export_callback(self, save_images: bool, only_annotated: bool) -> Callable: + if save_images and not only_annotated: + return dm.views.export_project_as_dataset + elif save_images: + return dm.views.export_project_as_dataset_only_annotated + else: + return dm.views.export_project_annotations @extend_schema(methods=['GET'], summary='Export a project as a dataset / Check dataset import status', description=textwrap.dedent(""" @@ -1317,8 +1322,13 @@ def append_data_chunk(self, request, pk, file_id): self._object = self.get_object() return self.append_tus_chunk(request, file_id) - def get_export_callback(self, save_images: bool) -> Callable: - return dm.views.export_task_as_dataset if save_images else dm.views.export_task_annotations + def get_export_callback(self, save_images: bool, only_annotated: bool) -> Callable: + if save_images and not only_annotated: + return dm.views.export_task_as_dataset + elif save_images: + return dm.views.export_task_as_dataset_only_annotated + else: + return dm.views.export_task_annotations # TODO: mark this endpoint as deprecated when new endpoint for downloading results will be implemented @extend_schema(methods=['GET'], summary='Get task annotations or export them as a dataset in a specific format', @@ -2015,8 +2025,13 @@ def dataset_export(self, request, pk): return self.export_dataset_v1(request=request, save_images=True) - def get_export_callback(self, save_images: bool) -> Callable: - return dm.views.export_job_as_dataset if save_images else dm.views.export_job_annotations + def get_export_callback(self, save_images: bool, only_annotated: bool) -> Callable: + if save_images and not only_annotated: + return dm.views.export_job_as_dataset + elif save_images: + return dm.views.export_job_as_dataset_only_annotated + else: + return dm.views.export_job_annotations @extend_schema(summary='Get data of a job', parameters=[ diff --git a/cvat/apps/events/handlers.py b/cvat/apps/events/handlers.py index f2d3f7577617..7c7fa39c27da 100644 --- a/cvat/apps/events/handlers.py +++ b/cvat/apps/events/handlers.py @@ -498,9 +498,10 @@ def handle_dataset_export( format_name: str, cloud_storage_id: Optional[int], save_images: bool, + only_annotated: bool, ) -> None: handle_dataset_io(instance, "export", - format_name=format_name, cloud_storage_id=cloud_storage_id, save_images=save_images) + format_name=format_name, cloud_storage_id=cloud_storage_id, save_images=save_images, only_annotated=only_annotated) def handle_dataset_import( instance: Union[Project, Task, Job], diff --git a/cvat/schema.yml b/cvat/schema.yml index badefe355b8d..9e361292392b 100644 --- a/cvat/schema.yml +++ b/cvat/schema.yml @@ -2546,6 +2546,12 @@ paths: type: boolean default: false description: Include images or not + - in: query + name: only_annotated + schema: + type: boolean + default: false + description: Whether to include all images in the export or only the ones with annotaions in the dataset tags: - jobs security: @@ -3965,6 +3971,12 @@ paths: type: boolean default: false description: Include images or not + - in: query + name: only_annotated + schema: + type: boolean + default: false + description: Whether to include all images in the export or only the ones with annotations in the dataset tags: - projects security: @@ -5909,6 +5921,12 @@ paths: type: boolean default: false description: Include images or not + - in: query + name: only_annotated + schema: + type: boolean + default: false + description: Whether to include all images in the export or only the ones with annotations in the dataset tags: - tasks security: diff --git a/tests/python/sdk/common.py b/tests/python/sdk/common.py index 3d98bf16925d..a45163759592 100644 --- a/tests/python/sdk/common.py +++ b/tests/python/sdk/common.py @@ -59,12 +59,14 @@ def _test_can_export_dataset( format_name: str, file_path: Path, include_images: bool, + only_annotated : bool = False, location: Optional[Location], request: pytest.FixtureRequest, cloud_storages: CloudStorageAssets, ): kwargs = { "include_images": include_images, + "only_annotated": only_annotated, "location": location, } @@ -99,3 +101,35 @@ def _test_can_export_dataset( ), **kwargs, ) + + def _test_can_export_dataset_with_only_annotated_images( + self, + task: Task, + format_name: str, + only_annotated: bool, + location: Optional[Location], + request: pytest.FixtureRequest, + job: Optional[Job] = None, + ): + entity = job if job is not None else task + file_path = self.tmp_path / f"{'job' if isinstance(entity, Job) else 'task'}_{entity.id}-{format_name.lower()}-only_annotated_{only_annotated}.zip" + + self._test_can_export_dataset( + entity, + format_name=format_name, + file_path=file_path, + include_images=True, + only_annotated=only_annotated, + location=location, + request=request, + cloud_storages=None, + ) + + with zipfile.ZipFile(file_path, 'r') as zip_file: + image_files = [file for file in zip_file.namelist() if file.endswith('.png')] + + if only_annotated: + annotated_image_count = len(task.get_annotations().shapes) + assert len(image_files) == annotated_image_count, "Exported images count does not match annotated images count when only_annotated is True." + else: + assert len(image_files) == task.size, "Exported images count does not match the task size when only_annotated is False." diff --git a/tests/python/sdk/test_jobs.py b/tests/python/sdk/test_jobs.py index ef46fcb8cf0e..51b84de6dd8d 100644 --- a/tests/python/sdk/test_jobs.py +++ b/tests/python/sdk/test_jobs.py @@ -6,6 +6,7 @@ from logging import Logger from pathlib import Path from typing import Optional, Tuple +import zipfile import pytest from cvat_sdk import Client @@ -148,6 +149,34 @@ def test_can_export_dataset( request=request, cloud_storages=cloud_storages, ) + + @pytest.mark.parametrize("format_name", ("CVAT for images 1.1",)) + @pytest.mark.parametrize("only_annotated", (True, False)) + @parametrize( + "task, location", + [ + (fixture_ref("fxt_new_task"), None), + ], + ) + def test_can_export_dataset_with_only_annotated_images_for_job( + self, + format_name: str, + only_annotated: bool, + task: Task, + location: Optional[Location], + request: pytest.FixtureRequest, + ): + job_id = task.get_jobs()[0].id + job = self.client.jobs.retrieve(job_id) + + self._test_can_export_dataset_with_only_annotated_images( + task= task, + format_name=format_name, + only_annotated=only_annotated, + location=location, + request=request, + job=job, + ) def test_can_download_preview(self, fxt_new_task: Task): frame_encoded = fxt_new_task.get_jobs()[0].get_preview() diff --git a/tests/python/sdk/test_tasks.py b/tests/python/sdk/test_tasks.py index 0dc5c0694e9c..4bd340ff4b9f 100644 --- a/tests/python/sdk/test_tasks.py +++ b/tests/python/sdk/test_tasks.py @@ -316,6 +316,30 @@ def test_can_export_dataset( cloud_storages=cloud_storages, ) + @pytest.mark.parametrize("format_name", ("CVAT for images 1.1",)) + @pytest.mark.parametrize("only_annotated", (True, False)) + @parametrize( + "task, location", + [ + (fixture_ref("fxt_new_task"), None), + ], + ) + def test_can_export_dataset_with_only_annotated_images_for_task( + self, + format_name: str, + only_annotated: bool, + task: Task, + location: Optional[Location], + request: pytest.FixtureRequest, + ): + self._test_can_export_dataset_with_only_annotated_images( + task=task, + format_name=format_name, + only_annotated=only_annotated, + location=location, + request=request, + ) + def test_can_download_dataset_twice_in_a_row(self, fxt_new_task: Task): pbar_out = io.StringIO() pbar = make_pbar(file=pbar_out)