From 5e5bdd70fe81977ea287f91348aae17fe044780c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Sm=C3=B3=C5=82ka?= Date: Sat, 18 Jan 2025 01:22:36 +0100 Subject: [PATCH] Improved the chessboard detection commit-id:6662bc18 --- child_lab_framework/_cli/cli.py | 2 +- child_lab_framework/_procedure/calibrate.py | 13 +- .../task/camera/detection/chessboard.py | 606 ++++++++++++++++-- 3 files changed, 564 insertions(+), 57 deletions(-) diff --git a/child_lab_framework/_cli/cli.py b/child_lab_framework/_cli/cli.py index f2c68b4..4f43760 100644 --- a/child_lab_framework/_cli/cli.py +++ b/child_lab_framework/_cli/cli.py @@ -89,7 +89,7 @@ def calibrate( calibration = calibration_procedure.run( video_input, video_output, - chessboard.Properties(square_size, *inner_board_corners), + chessboard.BoardProperties(square_size, *inner_board_corners), skip, ) diff --git a/child_lab_framework/_procedure/calibrate.py b/child_lab_framework/_procedure/calibrate.py index b8077b4..d426a8f 100644 --- a/child_lab_framework/_procedure/calibrate.py +++ b/child_lab_framework/_procedure/calibrate.py @@ -1,5 +1,6 @@ import typing from pathlib import Path +from typing import Literal import cv2 import numpy as np @@ -9,7 +10,7 @@ from ..core.video import Format, Input, Reader, Writer from ..task.camera.detection import chessboard from ..task.visualization import Configuration, Visualizer -from ..typing.array import FloatArray1, FloatArray2, FloatArray3 +from ..typing.array import FloatArray1, FloatArray2 # TODO: Implement procedures as classes with `Iterable` protocol @@ -17,7 +18,7 @@ def run( video_source: Path, annotated_video_destination: Path, - board_properties: chessboard.Properties, + board_properties: chessboard.BoardProperties, skip: int, ) -> Calibration: reader = Reader( @@ -40,8 +41,8 @@ def run( detector = chessboard.Detector(board_properties) - inner_corners_per_row = board_properties.inner_corners_per_row - inner_corners_per_column = board_properties.inner_corners_per_column + inner_corners_per_row = board_properties.inner_columns + inner_corners_per_column = board_properties.inner_rows square_size = board_properties.square_size # Prepare object points, like (0,0,0), (1,0,0), (2,0,0). ...,(6,5,0) @@ -56,7 +57,9 @@ def run( ) object_points: list[FloatArray2] = [] - image_points: list[FloatArray3] = [] + image_points: list[ + np.ndarray[tuple[int, Literal[1], Literal[2]], np.dtype[np.float32]] + ] = [] for _ in trange(1, video_properties.length, skip, desc='Processing video'): frame = reader.read_skipping(skip) diff --git a/child_lab_framework/task/camera/detection/chessboard.py b/child_lab_framework/task/camera/detection/chessboard.py index 30e0219..170b566 100644 --- a/child_lab_framework/task/camera/detection/chessboard.py +++ b/child_lab_framework/task/camera/detection/chessboard.py @@ -1,26 +1,305 @@ +import math import typing -from dataclasses import dataclass +from collections.abc import Sequence +from dataclasses import dataclass, field +from typing import ClassVar, Literal import cv2 as opencv import numpy as np from ....core import video -from ....typing.array import FloatArray2, FloatArray3 +from ....typing.array import ByteArray2, FloatArray1 from ....typing.video import Frame from ... import visualization +from ...visualization import annotation +# +------------------------------------------------------------------------------------+ +# | Algorithms and parts of code adopted from: | +# | Repository: https://github.com/ros-perception/image_pipeline | +# | Files: | +# | - `image_pipeline/camera_calibration/src/camera_calibration/calibrator.py` | +# | - `image_pipeline/camera_calibration/src/camera_calibration/mono_calibrator.py` | +# | Commit: 722ca08b98f37b7b148d429753da133ff1e2c7cf | +# +------------------------------------------------------------------------------------+ + + +@dataclass(frozen=True, slots=True) +class BoardProperties: + """ + Physical properties of the chessboard. + """ -@dataclass(frozen=True) -class Properties: square_size: float - inner_corners_per_row: int - inner_corners_per_column: int + inner_rows: int + inner_columns: int + + @property + def rigid_model(self) -> np.ndarray[tuple[int, Literal[2]], np.dtype[np.float32]]: + """ + 3D coordinates of the inner chessboard corners. + """ + + inner_rows = self.inner_rows + inner_columns = self.inner_columns + + # How they do it in ros_image_pipeline: + # total_points = inner_rows * inner_columns + # object_points = np.zeros((total_points, 1, 3), np.float32) + + # for j in range(total_points): + # object_points[j, 0, 0] = j // inner_columns + # object_points[j, 0, 1] = j % inner_columns + # object_points[j, 0, 2] = 0 + # object_points[j, 0, :] *= self.square_size + + model = np.zeros( + (inner_rows * inner_columns, 3), + np.float32, + ) + + corner_grid = np.mgrid[0:inner_columns, 0:inner_rows] + + # Flip the grid to have a "natural" orientation of axes and start from lower_left = (0, 0) + model[:, :2] = np.flipud(corner_grid.T).reshape(-1, 2) * self.square_size + + return model + + +@dataclass(init=False, slots=True) +class AggregatedDetectionProperties: + """ + Representation of `DetectionProperties` gathered from multiple detections, + used to describe the heuristic qualities of the whole detection process. + """ + + __data: np.ndarray[tuple[int, Literal[5]], np.dtype[np.float32]] + + __AREA_WEIGHT: ClassVar[float] = 0.0 + __SKEW_WEIGHT: ClassVar[float] = 2.0 + __X_OFFSET_WEIGHT: ClassVar[float] = 1.4 + __Y_OFFSET_WEIGHT: ClassVar[float] = 1.4 + __PERSPECTIVE_OFFSET_WEIGHT: ClassVar[float] = 2.5 + + __PROGRESS_WEIGHTS: ClassVar[np.ndarray[Literal[5], np.dtype[np.float32]]] = np.array( + ( + __AREA_WEIGHT, + __SKEW_WEIGHT, + __X_OFFSET_WEIGHT, + __Y_OFFSET_WEIGHT, + __PERSPECTIVE_OFFSET_WEIGHT, + ) + ) + + def __init__(self, results: Sequence['DetectionProperties']) -> None: + n = len(results) + + data = np.empty((n, 5), dtype=np.float32) + + for i, item in enumerate(results): + data[i, 0] = item.area + data[i, 1] = item.skew + data[i, 2] = item.x_offset + data[i, 3] = item.y_offset + data[i, 4] = item.perspective_offset + + self.__data = data + + def mean(self) -> 'DetectionProperties': + """ + Compute the mean properties and store them as a `DetectionProperties`. + """ + + # use .tolist to convert np.float32 to Python's float + return DetectionProperties(*np.mean(self.__data, axis=0).flatten().tolist()) + + def progress(self) -> 'DetectionProperties': + """ + Estimate the overall progress of the calibration + based on the ranges of parameters. + """ + + data = self.__data + + max = np.max(data, axis=0).flatten() + min = np.min(data, axis=0).flatten() + + # From ROS: + min[3] = 0.0 + min[4] = 0.0 + + value_range: FloatArray1 = max - min + progress = self.__PROGRESS_WEIGHTS * value_range + + # use .tolist to convert np.float32 to Python's float + return DetectionProperties(*progress.tolist()) + + +@dataclass(frozen=True, slots=True) +class DetectionProperties: + """ + Heuristic properties of the chessboard detection, + describing its orientation relative to the camera. + + Attributes + --- + area: float + Area of the projection of the board. + + skew: float + Angle between the pair of adjacent edges. + + x_offset: float + Displacement along the X-axis. + + y_offset: float + Displacement along the Y-axis. + + perspective_offset: float + Displacement in the camera's perspective, dependent on the size of the projection. + """ + + area: float + skew: float + x_offset: float + y_offset: float + perspective_offset: float + + def distance(self, other: 'DetectionProperties') -> float: + """ + Compute the Manhattan distance between two observations + in the space of values stored in `DetectionProperties`. + """ + + return ( + abs(self.skew - other.skew) + + abs(self.x_offset - other.x_offset) + + abs(self.y_offset - other.y_offset) + + abs(self.perspective_offset - other.perspective_offset) + ) -@dataclass(frozen=True) +@dataclass(slots=True) class Result: + """ + Description of the chessboard detection. + + Attributes + --- corners: FloatArray3 - properties: Properties # TODO: delete this field as soon as custom drawing procedure is implemented + `n_detection x 1 x 2` array containing the positions of the detected inner corners. + + board_properties: BoardProperties + Properties of the detected board. + + detection_properties: DetectionProperties + Additional information about the detection in relation to the camera. + """ + + corners: np.ndarray[tuple[int, Literal[1], Literal[2]], np.dtype[np.float32]] + board_properties: BoardProperties + detection_properties: DetectionProperties + + def __init__( + self, + corners: np.ndarray[tuple[int, Literal[1], Literal[2]], np.dtype[np.float32]], + board_properties: BoardProperties, + frame_width: int, + frame_height: int, + ) -> None: + self.corners = corners + self.board_properties = board_properties + self.detection_properties = self.__detection_properties(frame_width, frame_height) + + def __detection_properties( + self, + frame_width: int, + frame_height: int, + ) -> DetectionProperties: + """ + Calculate the properties describing the heuristic quality of the result. + """ + skew = self.skew + area = self.area + + border = math.sqrt(area) + + x_mean = float(np.mean(self.corners[..., 0])) + y_mean = float(np.mean(self.corners[..., 1])) + + x_offset = min(1.0, max(0.0, (x_mean - 0.5 * border) / (frame_width - border))) + y_offset = min(1.0, max(0.0, (y_mean - 0.5 * border) / (frame_height - border))) + perspective_offset = area / float(frame_width * frame_height) + + return DetectionProperties( + area, + skew, + x_offset, + y_offset, + perspective_offset, + ) + + @property + def area(self) -> float: + """ + Calculate the area of the board. + """ + # Assumes the board is a convex quadrilateral. + + upper_left, upper_right, lower_left, lower_right = self.outer_corners + + diagonal_1_x, diagonal_1_y = lower_right - upper_left + diagonal_2_x, diagonal_2_y = lower_left - upper_right + + return 0.5 * abs(float(diagonal_1_x * diagonal_2_y - diagonal_1_y * diagonal_2_x)) + + @property + def skew(self) -> float: + """ + Calculate the skew of the board. + """ + + upper_left, upper_right, lower_left, _ = self.outer_corners + + vertical_line = upper_right - upper_left + horizontal_line = lower_left - upper_left + + dot_product = np.dot(vertical_line, horizontal_line) + norm_product = np.linalg.norm(vertical_line) * np.linalg.norm(horizontal_line) + angle = float(np.arccos(dot_product / norm_product)) + + return min(1.0, 2.0 * abs(np.pi / 2.0 - angle)) + + @property + def outer_corners( + self, + ) -> tuple[FloatArray1, FloatArray1, FloatArray1, FloatArray1]: + """ + Outer corners of the board, in the following order: + upper-left, upper-right, lower-left, lower-right. + """ + + rows = self.board_properties.inner_rows + columns = self.board_properties.inner_columns + + corners = self.corners + + return ( + corners[0, 0], + corners[columns - 1, 0], + corners[(rows - 1) * columns, 0], + corners[rows * columns - 1, 0], + ) + + def average_speed(self, previous: 'Result', time_delta: float) -> float: + """ + Calculate the approximate average speed of the moving board, + assuming that `previous` was the last known displacement `time_delta` seconds ago. + """ + + return ( + float(np.average(np.linalg.norm(self.corners - previous.corners))) + / time_delta + ) def visualize( self, @@ -28,81 +307,306 @@ def visualize( frame_properties: video.Properties, configuration: visualization.Configuration, ) -> Frame: + """ + Draw the inner corners of the chessboard with colors depending on their order. + """ + if not configuration.chessboard_draw_corners: return frame - properties = self.properties + properties = self.board_properties pattern_shape = ( - properties.inner_corners_per_row, - properties.inner_corners_per_column, + properties.inner_columns, + properties.inner_rows, ) - # TODO: implement custom drawing opencv.drawChessboardCorners(frame, pattern_shape, self.corners, True) - return frame + upper_left, upper_right, lower_left, lower_right = self.outer_corners + area_vertices = np.stack( + ( + upper_left.astype(np.int32), + upper_right.astype(np.int32), + lower_right.astype(np.int32), + lower_left.astype(np.int32), + ) + ) -class Detector: - properties: Properties - square_size: float - inner_corners_per_row: int - inner_corners_per_column: int + distance = self.detection_properties.perspective_offset - termination_criteria: opencv.typing.TermCriteria - object_points_template: FloatArray2 + annotation.draw_polygon_with_description( + frame, + area_vertices, + f'{distance = :.2f}', + area_opacity=0.15, + font_scale=1.5, + font_thickness=2, + box_opacity=0.30, + ) - def __init__( - self, - properties: Properties, - *, - termination_criteria: opencv.typing.TermCriteria = ( + annotation.draw_point_with_description( + frame, + upper_left.astype(int).tolist(), + 'upper left', + text_location='above', + ) + + annotation.draw_point_with_description( + frame, + upper_right.astype(int).tolist(), + 'upper right', + text_location='above', + ) + + annotation.draw_point_with_description( + frame, + lower_left.astype(int).tolist(), + 'lower left', + ) + + annotation.draw_point_with_description( + frame, + lower_right.astype(int).tolist(), + 'lower right', + ) + + for corner, rigid_point in zip( + np.squeeze(self.corners), + np.squeeze(self.board_properties.rigid_model), + ): + x, y = corner.astype(int) + rx, ry, _ = rigid_point + annotation.draw_point_with_description( + frame, + (x, y), + f'({rx:.2f}, {ry:.2f})', + font_scale=0.25, + ) + + return frame + + +@dataclass(frozen=True, slots=True) +class Detector: + board_properties: BoardProperties + termination_criteria: opencv.typing.TermCriteria = field( + default=( opencv.TERM_CRITERIA_EPS + opencv.TERM_CRITERIA_MAX_ITER, 30, 0.001, - ), - ) -> None: - self.properties = properties - self.square_size = properties.square_size - self.inner_corners_per_row = properties.inner_corners_per_row - self.inner_corners_per_column = properties.inner_corners_per_column + ) + ) - # Termination criteria for corner sub-pixel refinement - self.termination_criteria = termination_criteria + # TODO: take grayscale frame as an argument to avoid conversion + def predict(self, frame: Frame) -> Result | None: + """ + Detect the board in the frame. + """ - # Prepare object points, like (0,0,0), (1,0,0), (2,0,0). ...,(6,5,0) - self.object_points_template = np.zeros( - (properties.inner_corners_per_column * properties.inner_corners_per_row, 3), - np.float32, + gray_frame: ByteArray2 = typing.cast( + ByteArray2, + opencv.cvtColor(frame, opencv.COLOR_RGB2GRAY), ) - self.object_points_template[:, :2] = np.mgrid[ - 0 : properties.inner_corners_per_row, 0 : properties.inner_corners_per_column - ].T.reshape(-1, 2) + downscaled_gray_frame, area_scale = self.__downscale(gray_frame) - # TODO: take grayscale frame as an argument to avoid conversion - def predict(self, frame: Frame) -> Result | None: - grayscale_frame = opencv.cvtColor(frame, opencv.COLOR_RGB2GRAY) - properties = self.properties + original_height, original_width, _ = frame.shape + height, width = downscaled_gray_frame.shape + + board_properties = self.board_properties found, corners_dirty = opencv.findChessboardCorners( - grayscale_frame, - (self.inner_corners_per_row, self.inner_corners_per_column), + downscaled_gray_frame, + (board_properties.inner_columns, board_properties.inner_rows), None, + opencv.CALIB_CB_ADAPTIVE_THRESH + | opencv.CALIB_CB_NORMALIZE_IMAGE + | opencv.CALIB_CB_FAST_CHECK, ) if not found: return None - corners_dirty = opencv.cornerSubPix( - grayscale_frame, + corners = typing.cast( + np.ndarray[tuple[int, Literal[1], Literal[2]], np.dtype[np.float32]], corners_dirty, - (11, 11), + ) + + if self.__is_close_to_edge(corners, 10.0, width, height): + return None + + corners = self.__normalize_orientation(corners, board_properties) + corners = self.__refine(downscaled_gray_frame, corners) + corners = self.__upscale( + gray_frame, + corners, + original_width / width, + original_height / height, + area_scale, + ) + + return Result( + corners, + board_properties, + original_width, + original_height, + ) + + def __downscale(self, frame: ByteArray2) -> tuple[ByteArray2, float]: + """ + Resize the frame to a size comparable to 640 x 480 px, preserving the aspect ratio. + + Returns + --- + result: tuple[Frame, float] + The resized frame and the scale used. + """ + + height, width = frame.shape + scale = math.sqrt(307_200.0 / (height * width)) + + new_size = ( + int(math.ceil(width * scale)), + int(math.ceil(height * scale)), + ) + + resized_frame = opencv.resize(frame, new_size) + + return typing.cast(ByteArray2, resized_frame), scale + + def __upscale( + self, + original_frame: ByteArray2, + corners: np.ndarray[tuple[int, Literal[1], Literal[2]], np.dtype[np.float32]], + x_scale: float, + y_scale: float, + area_scale: float, + ) -> np.ndarray[tuple[int, Literal[1], Literal[2]], np.dtype[np.float32]]: + """ + Upscale the corner coordinates with `x_scale` and `y_scale` along respective axes + and perform sub-pixel correction. + + Parameters + --- + original_frame: ByteArray2 + Original, non-scaled gray-scale frame. + + corners: FloatArray3 + Corners of the board detected on the downsampled frame. + + x_scale: float + Scale for the corner coordinates along X-axis. + + y_scale: float + Scale for the corner coordinates along Y-axis. + + area_scale: float + Scale obtained from `_downscale`. + + termination_criteria: opencv.typing.TermCriteria + Termination criteria from the sub-pixel refinement. + """ + + corners[..., 0] *= x_scale + corners[..., 1] *= y_scale + + radius = int(math.ceil(area_scale)) + + corners_dirty = opencv.cornerSubPix( + original_frame, + corners, + (radius, radius), (-1, -1), self.termination_criteria, ) - corners = typing.cast(list[FloatArray2], corners_dirty) + return corners_dirty # type: ignore + + def __is_close_to_edge( + self, + corners: np.ndarray[tuple[int, Literal[1], Literal[2]], np.dtype[np.float32]], + margin: float, + frame_width: int, + frame_height: int, + ) -> bool: + """ + Check if the detected board is closer to the edges of the frame than `margin`. + """ + + xs_lower = corners[:, 0, 0] <= margin + ys_lower = corners[:, 0, 1] <= margin + xs_greater = corners[:, 0, 0] >= (float(frame_width) - margin) + ys_greater = corners[:, 0, 1] >= (float(frame_height) - margin) + + return bool(np.any(xs_lower | ys_lower | xs_greater | ys_greater)) + + def __normalize_orientation( + self, + corners: np.ndarray[tuple[int, Literal[1], Literal[2]], np.dtype[np.float32]], + board: BoardProperties, + ) -> np.ndarray[tuple[int, Literal[1], Literal[2]], np.dtype[np.float32]]: + """ + Sort the `corners` increasingly and reshape them to `n_detections x board_width x board_height`. + """ + + rows = board.inner_rows + columns = board.inner_columns + + if rows != columns: + if corners[0, 0, 1] > corners[-1, 0, 1]: + return np.ascontiguousarray(np.flipud(corners)) + + return corners + + direction_indicator: list[bool] = np.squeeze( + (corners[-1] - corners[0]) >= 0.0 + ).tolist() + + match direction_indicator: + case [True, True]: + return corners + + case [True, False]: + corners_2d_grid = corners.reshape(rows, columns, 2) + corners_rotated = np.rot90(corners_2d_grid).reshape(-1, 1, 2) + return np.ascontiguousarray(corners_rotated) + + case [False, True]: + corners_2d_grid = corners.reshape(rows, columns, 2) + corners_rotated = np.rot90(corners_2d_grid, 3).reshape(-1, 1, 2) + return np.ascontiguousarray(corners_rotated) + + case [False, False]: + return np.ascontiguousarray(np.flipud(corners)) + + assert False, 'unreachable' + + def __refine( + self, + frame: ByteArray2, + corners: np.ndarray[tuple[int, Literal[1], Literal[2]], np.dtype[np.float32]], + ) -> np.ndarray[tuple[int, Literal[1], Literal[2]], np.dtype[np.float32]]: + """ + Correct the corner displacement according to the mutual distance between them. + """ + + xs: FloatArray1 = corners[..., 0].reshape(1, -1) + ys: FloatArray1 = corners[..., 1].reshape(1, -1) + + pairwise_distance = np.sqrt((xs - xs.T) ** 2 + (ys - ys.T) ** 2) + np.fill_diagonal(pairwise_distance, np.inf) + + minimal_distance = np.min(pairwise_distance) + radius = int(np.ceil(0.5 * minimal_distance)) + + corners_dirty = opencv.cornerSubPix( + frame, + corners, + (radius, radius), + (-1, -1), + self.termination_criteria, + ) - return Result(np.stack(corners), properties) + return corners_dirty # type: ignore