Skip to content

Commit

Permalink
chore: merge release/2.158.3 into main (#1755)
Browse files Browse the repository at this point in the history
Co-authored-by: FannyGaudin <150816089+FannyGaudin@users.noreply.github.com>
Co-authored-by: Clément Bussière <clement.bussiere@kili-technology.com>
  • Loading branch information
3 people authored Jul 30, 2024
1 parent 691a625 commit 707412c
Show file tree
Hide file tree
Showing 10 changed files with 145 additions and 19 deletions.
6 changes: 5 additions & 1 deletion .github/scripts/upload_test_stats_datadog.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from datadog import api, initialize
from tqdm import tqdm

from kili.core.helpers import get_response_json, log_raise_for_status

# https://docs.datadoghq.com/developers/guide/what-best-practices-are-recommended-for-naming-metrics-and-tags/#rules-and-best-practices-for-naming-metrics
# map the test name to the metrics name on datadog
TESTS_TO_PLOT_ON_DATADOG_MAP = {
Expand Down Expand Up @@ -77,7 +79,9 @@ def get_workflow_runs_from_github() -> List[Dict]:
while True:
print("Fetching page", page, "...")
response = requests.get(url + f"&page={page}", headers=HEADERS, timeout=30)
response_json = response.json()
log_raise_for_status(response)

response_json = get_response_json(response)
for workflow_run in response_json["workflow_runs"]:
updated_at = datetime.strptime(workflow_run["updated_at"], r"%Y-%m-%dT%H:%M:%SZ")

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]

[project]
name = "kili"
version = "2.158.2"
version = "2.158.3"
description = "Python client for Kili Technology labeling tool"
readme = "README.md"
authors = [{ name = "Kili Technology", email = "contact@kili-technology.com" }]
Expand Down
2 changes: 1 addition & 1 deletion src/kili/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Kili Python SDK."""

__version__ = "2.158.2"
__version__ = "2.158.3"
9 changes: 4 additions & 5 deletions src/kili/adapters/kili_api_gateway/asset/formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Dict

from kili.adapters.http_client import HttpClient
from kili.core.helpers import is_url
from kili.core.helpers import get_response_json, is_url, log_raise_for_status
from kili.domain.types import ListOrTuple


Expand All @@ -13,10 +13,9 @@ def load_json_from_link(link: str, http_client: HttpClient) -> Dict:
if link == "" or not is_url(link):
return {}

try:
return http_client.get(link, timeout=30).json()
except json.JSONDecodeError:
return {}
response = http_client.get(link, timeout=30)
log_raise_for_status(response)
return get_response_json(response)


def load_asset_json_fields(asset: Dict, fields: ListOrTuple[str], http_client: HttpClient) -> Dict:
Expand Down
29 changes: 29 additions & 0 deletions src/kili/core/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import functools
import glob
import json
import mimetypes
import os
import re
Expand All @@ -10,11 +11,13 @@
from typing import Any, Callable, Dict, List, Optional, Type, TypeVar, Union

import pyparsing as pp
import requests
import tenacity
from typing_extensions import get_args, get_origin

from kili.adapters.http_client import HttpClient
from kili.core.constants import mime_extensions_for_IV2
from kili.log.logging import logger

T = TypeVar("T")

Expand Down Expand Up @@ -342,3 +345,29 @@ def is_empty_list_with_warning(method_name: str, argument_name: str, argument_va
)
return True
return False


def log_raise_for_status(response: requests.Response) -> None:
"""Log the error message of a requests.Response if it is not ok.
Args:
response: a requests.Response
"""
try:
response.raise_for_status()
except requests.exceptions.HTTPError as err:
logger.exception("An error occurred while processing the response: %s", err)
raise


def get_response_json(response: requests.Response) -> dict:
"""Get the json from a requests.Response.
Args:
response: a requests.Response
"""
try:
return response.json()
except json.JSONDecodeError:
logger.exception("An error occurred while decoding the json response")
return {}
7 changes: 7 additions & 0 deletions src/kili/log/logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import logging
import sys

logger = logging.getLogger("kili")
logger.setLevel(logging.WARNING)
logger.propagate = False
logger.addHandler(logging.StreamHandler(stream=sys.stderr))
75 changes: 73 additions & 2 deletions src/kili/services/asset_import/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from concurrent.futures import ThreadPoolExecutor
from enum import Enum
from itertools import repeat
from json import JSONDecodeError, loads
from typing import List

from kili.core.helpers import get_mime_type, is_url
Expand Down Expand Up @@ -162,20 +163,90 @@ def get_data_type(self, assets):
return VideoDataType.HOSTED_FILE
return VideoDataType.LOCAL_FILE

@staticmethod
def are_native_videos(assets) -> bool:
"""Determine if assets should be imported asynchronously and cut into frames."""
should_use_native_video_array = []
for asset in assets:
# json_metadata stringification is done later on the call
json_metadata_ = asset.get("json_metadata", {})
processing_parameters = json_metadata_.get("processingParameters", {})
should_use_native_video_array.append(
processing_parameters.get("shouldUseNativeVideo", True)
)
if all(should_use_native_video_array):
return True
if all(not b for b in should_use_native_video_array):
return False
raise ImportValidationError(
"""
Cannot upload videos to split into frames
and video to keep as native in the same time.
Please separate the assets into 2 calls
"""
)

@staticmethod
def has_complete_processing_parameters(asset) -> bool:
"""Determine if assets should be imported asynchronously and cut into frames."""
try:
json_metadata = asset.get("jsonMetadata")
if not json_metadata:
return False

processing_parameters = loads(json_metadata).get("processingParameters")
if not processing_parameters:
return False

required_keys = [
"codec",
"delayDueToMinPts",
"framesPlayedPerSecond",
"numberOfFrames",
"startTime",
]
required_types = [str, int, float, int, float]

for key, required_type in zip(required_keys, required_types):
value = processing_parameters.get(key)
if value is None or not isinstance(value, required_type):
return False

return True
except JSONDecodeError:
return False

def videos_have_complete_processing_parameters(self, assets) -> bool:
"""Determine if assets should be imported asynchronously and cut into frames."""
for asset in assets:
if not self.has_complete_processing_parameters(asset):
return False
return True

def import_assets(self, assets: List[AssetLike]):
"""Import video assets into Kili."""
self._check_upload_is_allowed(assets)
data_type = self.get_data_type(assets)
assets = self.filter_duplicate_external_ids(assets)
if data_type == VideoDataType.LOCAL_FILE:
assets = self.filter_local_assets(assets, self.raise_error)
batch_params = BatchParams(is_hosted=False, is_asynchronous=True)
are_native_videos = self.are_native_videos(assets)
videos_have_complete_processing_parameters = (
self.videos_have_complete_processing_parameters(assets)
)
is_synchronous = are_native_videos and videos_have_complete_processing_parameters
batch_params = BatchParams(is_hosted=False, is_asynchronous=not is_synchronous)
batch_importer = VideoContentBatchImporter(
self.kili, self.project_params, batch_params, self.pbar
)
batch_size = IMPORT_BATCH_SIZE
elif data_type == VideoDataType.HOSTED_FILE:
batch_params = BatchParams(is_hosted=True, is_asynchronous=True)
are_native_videos = self.are_native_videos(assets)
videos_have_complete_processing_parameters = (
self.videos_have_complete_processing_parameters(assets)
)
is_synchronous = are_native_videos and videos_have_complete_processing_parameters
batch_params = BatchParams(is_hosted=True, is_asynchronous=not is_synchronous)
batch_importer = VideoContentBatchImporter(
self.kili, self.project_params, batch_params, self.pbar
)
Expand Down
6 changes: 5 additions & 1 deletion src/kili/services/export/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Any, Dict, Iterator, List

from kili.adapters.http_client import HttpClient
from kili.core.helpers import get_response_json, log_raise_for_status

from .exceptions import DownloadError

Expand Down Expand Up @@ -48,8 +49,11 @@ def get_frames(self, content_url: str) -> List[str]:
frames: List[str] = []
json_content_resp = self.http_client.get(content_url, timeout=30)

log_raise_for_status(json_content_resp)
json_response = get_response_json(json_content_resp)

if json_content_resp.ok:
frames = list(json_content_resp.json().values())
frames = list(json_response.values())
return frames

def get_content_stream(self, content_url: str, block_size: int) -> Iterator[Any]:
Expand Down
9 changes: 7 additions & 2 deletions src/kili/services/export/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
from kili.adapters.http_client import HttpClient
from kili.adapters.kili_api_gateway.helpers.queries import QueryOptions
from kili.core.constants import QUERY_BATCH_SIZE
from kili.core.helpers import validate_category_search_query
from kili.core.helpers import (
get_response_json,
log_raise_for_status,
validate_category_search_query,
)
from kili.core.utils.pagination import batcher
from kili.domain.asset import AssetFilters, AssetId
from kili.domain.project import ProjectId
Expand Down Expand Up @@ -187,7 +191,8 @@ def is_geotiff_asset_with_lat_lon_coords(asset: Dict, http_client: HttpClient) -

if isinstance(asset["jsonContent"], str) and asset["jsonContent"].startswith("http"):
response = http_client.get(asset["jsonContent"], timeout=30)
json_content = response.json()
log_raise_for_status(response)
json_content = get_response_json(response)

else:
json_content = asset["jsonContent"]
Expand Down
19 changes: 13 additions & 6 deletions src/kili/use_cases/asset/media_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from tenacity.wait import wait_random

from kili.adapters.http_client import HttpClient
from kili.core.helpers import get_response_json, log_raise_for_status
from kili.domain.asset import AssetExternalId
from kili.domain.project import ProjectId
from kili.domain.types import ListOrTuple
Expand Down Expand Up @@ -125,8 +126,10 @@ def download_assets(self, assets: List[Dict]) -> List[Dict]:
def download_single_asset(self, asset: Dict) -> Dict[str, Any]:
"""Download single asset on disk and modify asset attributes."""
if "ocrMetadata" in asset and str(asset["ocrMetadata"]).startswith("http"):
response = self.http_client.get(asset["ocrMetadata"], timeout=20).json()
asset["ocrMetadata"] = response
response = self.http_client.get(asset["ocrMetadata"], timeout=20)
log_raise_for_status(response)
json_content = get_response_json(response)
asset["ocrMetadata"] = json_content

if "jsonContent" in asset and str(asset["jsonContent"]).startswith("http"):
# richtext
Expand All @@ -137,8 +140,10 @@ def download_single_asset(self, asset: Dict) -> Dict[str, Any]:

# video frames
elif self.project_input_type == "VIDEO":
response = self.http_client.get(asset["jsonContent"], timeout=20).json()
urls = tuple(response.values())
response = self.http_client.get(asset["jsonContent"], timeout=20)
log_raise_for_status(response)
json_content = get_response_json(response)
urls = tuple(json_content.values())
nbr_char_zfill = len(str(len(urls)))
img_names = (
f'{asset["externalId"]}_{f"{i+1}".zfill(nbr_char_zfill)}'
Expand All @@ -158,8 +163,10 @@ def download_single_asset(self, asset: Dict) -> Dict[str, Any]:
# big images
elif self.project_input_type == "IMAGE":
# the "jsonContent" contains some information but not the image
response = self.http_client.get(asset["jsonContent"], timeout=20).json()
asset["jsonContent"] = response
response = self.http_client.get(asset["jsonContent"], timeout=20)
log_raise_for_status(response)
json_content = get_response_json(response)
asset["jsonContent"] = json_content

else:
raise NotImplementedError(
Expand Down

0 comments on commit 707412c

Please sign in to comment.