Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (4,) + inhomogeneous part. #213

Open
otakarkuchar opened this issue Jul 22, 2024 · 0 comments

Comments

@otakarkuchar
Copy link

otakarkuchar commented Jul 22, 2024

Had this issue.

in detect_text -> def get_prediction :

in this line: polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)

def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2):
    if len(polys) > 0:
        polys = np.array(polys)
        for k in range(len(polys)):
            if polys[k] is not None:
                polys[k] *= (ratio_w * ratio_net, ratio_h * ratio_net)
    return polys

fixed by:

def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2):
    adjusted_polys = []
    if len(polys) > 0:
        print("Before adjustment:")
        for k in range(len(polys)):
            if polys[k] is not None:
                print(f"Poly {k}: {polys[k]} - Shape: {np.array(polys[k]).shape}")
        
        for k in range(len(polys)):
            if polys[k] is not None:
                adjusted_poly = np.array(polys[k]) * (ratio_w * ratio_net, ratio_h * ratio_net)
                adjusted_polys.append(adjusted_poly)
        
        print("After adjustment:")
        for k in range(len(adjusted_polys)):
            print(f"Poly {k}: {adjusted_polys[k]} - Shape: {np.array(adjusted_polys[k]).shape}")
    
    return adjusted_polys

and update def get_prediction:

def get_prediction(
    image,
    craft_net,
    refine_net=None,
    text_threshold: float = 0.7,
    link_threshold: float = 0.4,
    low_text: float = 0.4,
    cuda: bool = False,
    long_size: int = 1280,
    poly: bool = True,
):
    t0 = time.time()

    # read/convert image
    image = image_utils.read_image(image)

    # resize
    img_resized, target_ratio, size_heatmap = image_utils.resize_aspect_ratio(
        image, long_size, interpolation=cv2.INTER_LINEAR
    )
    ratio_h = ratio_w = 1 / target_ratio
    resize_time = time.time() - t0
    t0 = time.time()

    # preprocessing
    x = image_utils.normalizeMeanVariance(img_resized)
    x = torch_utils.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = torch_utils.Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()
    preprocessing_time = time.time() - t0
    t0 = time.time()

    # forward pass
    with torch_utils.no_grad():
        y, feature = craft_net(x)
    craftnet_time = time.time() - t0
    t0 = time.time()

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # refine link
    if refine_net is not None:
        with torch_utils.no_grad():
            y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()
    refinenet_time = time.time() - t0
    t0 = time.time()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(
        score_text, score_link, text_threshold, link_threshold, low_text, poly
    )

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None:
            polys[k] = boxes[k]

    # get image size
    img_height = image.shape[0]
    img_width = image.shape[1]

    # calculate box coords as ratios to image size
    boxes_as_ratio = []
    for box in boxes:
        boxes_as_ratio.append(box / [img_width, img_height])
    boxes_as_ratio = np.array(boxes_as_ratio)

    # calculate poly coords as ratios to image size
    polys_as_ratio = []
    for poly in polys:
        polys_as_ratio.append(poly / [img_width, img_height])
    polys_as_ratio = np.array(polys_as_ratio, dtype=object)  # Use dtype=object to handle varying shapes

    text_score_heatmap = image_utils.cvt2HeatmapImg(score_text)
    link_score_heatmap = image_utils.cvt2HeatmapImg(score_link)

    postprocess_time = time.time() - t0

    times = {
        "resize_time": resize_time,
        "preprocessing_time": preprocessing_time,
        "craftnet_time": craftnet_time,
        "refinenet_time": refinenet_time,
        "postprocess_time": postprocess_time,
    }

    return {
        "boxes": boxes,
        "boxes_as_ratios": boxes_as_ratio,
        "polys": polys,
        "polys_as_ratios": polys_as_ratio,
        "heatmaps": {
            "text_score_heatmap": text_score_heatmap,
            "link_score_heatmap": link_score_heatmap,
        },
        "times": times,
    }

"""
The issue lies in the inconsistent shapes of the polygons (polys). Specifically, Poly 3 has a shape of (14, 2), while the others have a shape of (4, 2). This inconsistency causes the error when attempting to convert the list of polygons to a NumPy array.

To handle this, we need to ensure that all polygons have a consistent shape or handle them differently if they are of varying shapes. We can either pad the polygons to the same size or handle them in a way that does not require converting them to a single NumPy array.
"""

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant