Reworked to fit for keypoint detection but inspiration taken from waspinator @ https://github.com/waspinator/pycococreator

resize_binary_mask[source]

resize_binary_mask(array, new_size)

Returns a resized binary mask array.

def resize_binary_mask(array, new_size):
    """Returns a resized binary mask array."""
    image = Image.fromarray(array.astype(np.uint8)*255)
    image = image.resize(new_size)
    return np.asarray(image).astype(np.bool_)

close_contour[source]

close_contour(contour)

Returns a closed contour i.e the last and first element in the countour is equal.

def close_contour(contour):
    """
    Returns a closed contour i.e the last and first element in the 
    countour is equal.
    """
    if not np.array_equal(contour[0], contour[-1]):
        contour = np.vstack((contour, contour[0]))
    return contour

binary_mask_to_polygon[source]

binary_mask_to_polygon(binary_mask, tolerance=0)

Converts a binary mask to COCO polygon representation.

Args:

  • binary_mask: a 2D binary numpy array where '1's represent the object.
  • tolerance: Maximum distance from original points of polygon to approximated polygonal chain. If tolerance is 0, the original coordinate array is returned.
def binary_mask_to_polygon(binary_mask, tolerance=0):
    """
    Converts a binary mask to COCO polygon representation.

    Args:
    - binary_mask: a 2D binary numpy array where '1's represent the object.
    - tolerance: Maximum distance from original points of polygon to 
    approximated polygonal chain. If tolerance is 0, the original 
    coordinate array is returned.
    """
    polygons = []
    # pad mask to close contours of shapes which start and end at an edge
    padded_binary_mask = np.pad(
        binary_mask, pad_width=1, mode='constant', constant_values=0
    )
    contours = measure.find_contours(padded_binary_mask, 0.5)
    contours = np.subtract(contours, 1)
    for contour in contours:
        contour = close_contour(contour)
        contour = measure.approximate_polygon(contour, tolerance)
        if len(contour) < 3:
            continue
        contour = np.flip(contour, axis=1)
        segmentation = contour.ravel().tolist()
        # After padding and subtracting 1 we may get -0.5 points in our segmentation 
        segmentation = [0 if i < 0 else i for i in segmentation]
        polygons.append(segmentation)

    return polygons

create_image_info[source]

create_image_info(image_id, file_name, image, date_captured='2022-05-09 15:32:55.711749', license_id=1, coco_url='', flickr_url='')

Returns the image information in JSON style format.

def create_image_info(
        image_id, file_name, image, 
        date_captured=datetime.datetime.utcnow().isoformat(' '), 
        license_id=1, coco_url="", flickr_url=""):
    """Returns the image information in JSON style format."""
    
    image_info = {
        "id": image_id,
        "file_name": file_name,
        "width": image.shape[1],
        "height": image.shape[0],
        "date_captured": date_captured,
        "license": license_id,
        "coco_url": coco_url,
        "flickr_url": flickr_url
    }

    return image_info

create_annotation_info[source]

create_annotation_info(annotation_id, image_id, binary_mask, bounding_box, image_size=None, tolerance=2, keypoints=None)

Returns annotation information as a dictionary for COCO-keypoints in a JSON style format.

def create_annotation_info(
        annotation_id, image_id, binary_mask, 
        bounding_box, image_size=None, tolerance=2, keypoints=None):
    """
    Returns annotation information as a dictionary for COCO-keypoints in a 
    JSON style format.
    """
    if image_size is not None:
        binary_mask = resize_binary_mask(binary_mask, image_size)

    binary_mask_encoded = mask.encode(
        np.asfortranarray(binary_mask.astype(np.uint8)))
    area = mask.area(binary_mask_encoded)
    
    segmentation = binary_mask_to_polygon(binary_mask, tolerance)
    if not segmentation:
        return None

    annotation_info = {
       "segmentation": segmentation, 
       "num_keypoints": 18,
       "area": area.tolist(),
       "iscrowd": 0,
       "keypoints": keypoints,
       "image_id": image_id,
       "bbox": bounding_box.tolist(),
       "category_id": 1,
       "id": annotation_id,
    }

    return annotation_info