Pipeline

import albumentations as A
import cv2
import numpy as np

# Assume 'image' is loaded as a NumPy array (e.g., 100x100x3)
image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8) # Dummy image

# 1. Define the pipeline
pipeline = A.Compose([
    A.HorizontalFlip(p=0.5), # 50% chance to flip
    A.RandomBrightnessContrast(p=0.8), # 80% chance to adjust brightness/contrast
    A.GaussianBlur(p=0.3), # 30% chance to blur
])

# 2. Apply the pipeline
transformed_data = pipeline(image=image)
transformed_image = transformed_data['image']

print(f"Original shape: {image.shape}, Transformed shape: {transformed_image.shape}")
# Note: Shape usually remains the same unless a spatial transform like Resize is used.

모델에 따른 Bounding Box formats

pascal_voc: [x_min, y_min, x_max, y_max] in absolute pixel coordinates. (x_min, y_min) is the top-left corner, and (x_max, y_max) is the bottom-right corner.
albumentations: Similar to pascal_voc, but uses normalized coordinates: [normalized_x_min, normalized_y_min, normalized_x_max, normalized_y_max]. These are calculated as x_pixel / image_width and y_pixel / image_height.
coco: [x_min, y_min, bbox_width, bbox_height] in absolute pixel coordinates. (x_min, y_min) is the top-left corner.
yolo: [normalized_x_center, normalized_y_center, normalized_bbox_width, normalized_bbox_height]. These are normalized coordinates.

증강 파이프 라인 정의

import albumentations as A
import cv2
import numpy as np

# Example pipeline (matches original doc for consistency with images)
train_transform = A.Compose([
    A.RandomCrop(width=450, height=450, p=1.0), # Example random crop
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
], bbox_params=A.BboxParams(format='coco', # Specify input format
                           label_fields=['class_labels'] # Specify label argument name(s)
                           ))

이미지와 바운딩 박스 읽기

이미지를 (예: RGB NumPy 배열로) 불러오고 바운딩 박스 데이터를 준비.

# Load Image
image_path = "/path/to/your/image.jpg"
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Prepare Bounding Boxes (example using 'coco' format)
# Each row is [x_min, y_min, bbox_width, bbox_height]
bboxes = np.array([
    [23, 74, 295, 388],
    [377, 294, 252, 161],
    [333, 421, 49, 49],
], dtype=np.float32)

# Prepare Labels (using the name specified in label_fields)
class_labels = np.array(['dog', 'cat', 'sports ball'])
# Example with multiple label fields if defined in BboxParams:
# class_categories = np.array(['animal', 'animal', 'item'])

파이프라인 적용

Label_fields 에 정의된 키워드 인수를 사용하여 이미지, 바운딩 박스(boxes), 그리고 해당 라벨 리스트를 전달.

# Using train_transform defined earlier which has label_fields=['class_labels']
augmented = train_transform(image=image, bboxes=bboxes, class_labels=class_labels)

transformed_image = augmented['image']
transformed_bboxes = augmented['bboxes']
# Access transformed labels using the key from label_fields
transformed_class_labels = augmented['class_labels']

# If multiple label fields were defined (e.g., label_fields=['class_labels', 'category_id'])
# and passed like: transform(..., class_labels=..., category_id=...)
# then access them: transformed_category_ids = augmented['category_id']

시각화

A.Normalize와 A.ToTensorV2를 적용하기 전에 출력을 시각화

import matplotlib.pyplot as plt
import random
import numpy as np # Ensure numpy is imported
import cv2 # Ensure cv2 is imported

# Helper function to draw bounding boxes (adjust format handling as needed)
def draw_bboxes(image_np, bboxes, labels, class_name_map=None, color=(0, 255, 0), thickness=2):
    img_res = image_np.copy()
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.5
    font_thickness = 1

    if not isinstance(bboxes, np.ndarray):
        print(f"Warning: bboxes is not an ndarray: {type(bboxes)}")
        return img_res
    if not isinstance(labels, np.ndarray):
        print(f"Warning: labels is not an ndarray: {type(labels)}")
        # Attempt to proceed if labels seem usable, otherwise return
        if len(bboxes) != len(labels):
            print("Warning: bbox and label length mismatch, cannot draw labels.")
            labels = np.array(['?' for _ in bboxes]) # Placeholder
        elif labels.dtype == object:
             print("Warning: labels contain object dtype, converting to string.")
             labels = labels.astype(str)

    for bbox, label in zip(bboxes, labels):
        # Assuming bbox format allows direct conversion to int x_min, y_min, x_max, y_max
        # This might need adjustment based on the ACTUAL format in your bboxes list
        # Example for pascal_voc or albumentations (after denormalizing)
        try:
             # Check if bbox has at least 4 elements
            if len(bbox) < 4:
                print(f"Warning: Skipping invalid bbox (fewer than 4 coords): {bbox}")
                continue
            x_min, y_min, x_max, y_max = map(int, bbox[:4])
        except (ValueError, TypeError) as e:
            print(f"Warning: Could not convert bbox coords to int: {bbox}, Error: {e}")
            continue # Skip this bbox

        cv2.rectangle(img_res, (x_min, y_min), (x_max, y_max), color, thickness)

        label_name = str(label) if class_name_map is None else class_name_map.get(label, str(label))
        # Simple text placement above the box
        (text_width, text_height), baseline = cv2.getTextSize(label_name, font, font_scale, font_thickness)
        text_y = y_min - baseline if y_min - baseline > text_height else y_min + text_height
        cv2.putText(img_res, label_name, (x_min, text_y), font, font_scale, color, font_thickness)

    return img_res

def visualize_bbox_augmentations(image, bboxes, labels, transform, samples=5):
    """Visualizes original image and augmented versions."""
    # Prepare visualization pipeline (strip Normalize, ToTensor)
    vis_transform = None
    if isinstance(transform, A.Compose):
        vis_transform_list = [
            t for t in transform
            if not isinstance(t, (A.Normalize, A.ToTensorV2))
        ]
        # Recreate Compose with original bbox_params if they exist
        # Access bbox_params from the processor if it exists
        bbox_processor = transform.processors.get('bboxes')
        bbox_params = bbox_processor.params if bbox_processor else None
        vis_transform = A.Compose(vis_transform_list, bbox_params=bbox_params)
    else:
        print("Cannot strip Normalize/ToTensor: transform is not an A.Compose instance.")
        vis_transform = transform # Use original transform

    if vis_transform is None or 'bboxes' not in vis_transform.processors:
         print("Cannot visualize: Pipeline needs A.BboxParams for visualization.")
         return

    figure, ax = plt.subplots(1, samples + 1, figsize=(15, 5))

    # Draw original
    original_drawn = draw_bboxes(image, bboxes, labels)
    ax[0].imshow(original_drawn)
    ax[0].set_title("Original")
    ax[0].axis("off")

    # Draw augmented samples
    for i in range(samples):
        try:
            # Apply the visualization transform
            # Ensure labels are passed correctly based on label_fields
            bbox_processor = vis_transform.processors.get('bboxes')
            label_fields = bbox_processor.params.label_fields if bbox_processor else []
            label_args = {field: labels for field in label_fields}
            augmented = vis_transform(image=image, bboxes=bboxes, **label_args)

            aug_image = augmented['image']
            aug_bboxes = augmented['bboxes']
            # Extract labels correctly based on label_fields
            if label_fields:
                aug_labels = augmented[label_fields[0]]
            else:
                aug_labels = ['?' for _ in aug_bboxes] # Placeholder if no labels

            augmented_drawn = draw_bboxes(aug_image, aug_bboxes, aug_labels)
            ax[i+1].imshow(augmented_drawn)
            ax[i+1].set_title(f"Augmented {i+1}")
        except Exception as e:
            print(f"Error during augmentation sample {i+1}: {e}")
            ax[i+1].imshow(image) # Show original on error
            ax[i+1].set_title(f"Aug Error {i+1}")
        finally:
            ax[i+1].axis("off")

    plt.tight_layout()
    plt.show()

# --- Example Usage --- #
# Assuming 'image', 'bboxes', 'class_labels', and 'train_transform' are defined as in Step 3/4

# Load a sample image and annotations
# image = cv2.imread('your_image.jpg')
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# bboxes = [[...], [...]] # In 'coco' format for this example
# class_labels = ['label1', 'label2']

# Define the transform (must include bbox_params with correct format and label_fields)
# train_transform = A.Compose([
#     A.RandomCrop(width=450, height=450, p=1.0),
#     A.HorizontalFlip(p=0.5),
#     A.RandomBrightnessContrast(p=0.2),
#     # A.Normalize(...), # Include if used, will be stripped by visualize func
#     # A.ToTensorV2(),  # Include if used, will be stripped by visualize func
# ], bbox_params=A.BboxParams(format='coco', label_fields=['class_labels']))

# Visualize
# visualize_bbox_augmentations(image, bboxes, class_labels, train_transform, samples=4)

데이터 타입 요구 사항

이미지 데이터 (image, images, volume, volumes): uint8 또는 float32 타입의 NumPy 배열이어야 합니다.
마스크: 모든 정수 타입의 NumPy 배열이 가능합니다.
바운딩 박스: 일반적으로 (num_boxes, 4) 형태의 float32 NumPy 배열이어야 합니다.