import albumentations as A
import cv2
import numpy as np
# Assume 'image' is loaded as a NumPy array (e.g., 100x100x3)
image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8) # Dummy image
# 1. Define the pipeline
pipeline = A.Compose([
A.HorizontalFlip(p=0.5), # 50% chance to flip
A.RandomBrightnessContrast(p=0.8), # 80% chance to adjust brightness/contrast
A.GaussianBlur(p=0.3), # 30% chance to blur
])
# 2. Apply the pipeline
transformed_data = pipeline(image=image)
transformed_image = transformed_data['image']
print(f"Original shape: {image.shape}, Transformed shape: {transformed_image.shape}")
# Note: Shape usually remains the same unless a spatial transform like Resize is used.
pascal_voc: [x_min, y_min, x_max, y_max] in absolute pixel coordinates. (x_min, y_min) is the top-left corner, and (x_max, y_max) is the bottom-right corner.
albumentations: Similar to pascal_voc, but uses normalized coordinates: [normalized_x_min, normalized_y_min, normalized_x_max, normalized_y_max]. These are calculated as x_pixel / image_width and y_pixel / image_height.
coco: [x_min, y_min, bbox_width, bbox_height] in absolute pixel coordinates. (x_min, y_min) is the top-left corner.
yolo: [normalized_x_center, normalized_y_center, normalized_bbox_width, normalized_bbox_height]. These are normalized coordinates.
import albumentations as A
import cv2
import numpy as np
# Example pipeline (matches original doc for consistency with images)
train_transform = A.Compose([
A.RandomCrop(width=450, height=450, p=1.0), # Example random crop
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
], bbox_params=A.BboxParams(format='coco', # Specify input format
label_fields=['class_labels'] # Specify label argument name(s)
))
이미지를 (예: RGB NumPy 배열로) 불러오고 바운딩 박스 데이터를 준비.
# Load Image
image_path = "/path/to/your/image.jpg"
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Prepare Bounding Boxes (example using 'coco' format)
# Each row is [x_min, y_min, bbox_width, bbox_height]
bboxes = np.array([
[23, 74, 295, 388],
[377, 294, 252, 161],
[333, 421, 49, 49],
], dtype=np.float32)
# Prepare Labels (using the name specified in label_fields)
class_labels = np.array(['dog', 'cat', 'sports ball'])
# Example with multiple label fields if defined in BboxParams:
# class_categories = np.array(['animal', 'animal', 'item'])
Label_fields 에 정의된 키워드 인수를 사용하여 이미지, 바운딩 박스(boxes), 그리고 해당 라벨 리스트를 전달.
# Using train_transform defined earlier which has label_fields=['class_labels']
augmented = train_transform(image=image, bboxes=bboxes, class_labels=class_labels)
transformed_image = augmented['image']
transformed_bboxes = augmented['bboxes']
# Access transformed labels using the key from label_fields
transformed_class_labels = augmented['class_labels']
# If multiple label fields were defined (e.g., label_fields=['class_labels', 'category_id'])
# and passed like: transform(..., class_labels=..., category_id=...)
# then access them: transformed_category_ids = augmented['category_id']
A.Normalize와 A.ToTensorV2를 적용하기 전에 출력을 시각화
import matplotlib.pyplot as plt
import random
import numpy as np # Ensure numpy is imported
import cv2 # Ensure cv2 is imported
# Helper function to draw bounding boxes (adjust format handling as needed)
def draw_bboxes(image_np, bboxes, labels, class_name_map=None, color=(0, 255, 0), thickness=2):
img_res = image_np.copy()
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 0.5
font_thickness = 1
if not isinstance(bboxes, np.ndarray):
print(f"Warning: bboxes is not an ndarray: {type(bboxes)}")
return img_res
if not isinstance(labels, np.ndarray):
print(f"Warning: labels is not an ndarray: {type(labels)}")
# Attempt to proceed if labels seem usable, otherwise return
if len(bboxes) != len(labels):
print("Warning: bbox and label length mismatch, cannot draw labels.")
labels = np.array(['?' for _ in bboxes]) # Placeholder
elif labels.dtype == object:
print("Warning: labels contain object dtype, converting to string.")
labels = labels.astype(str)
for bbox, label in zip(bboxes, labels):
# Assuming bbox format allows direct conversion to int x_min, y_min, x_max, y_max
# This might need adjustment based on the ACTUAL format in your bboxes list
# Example for pascal_voc or albumentations (after denormalizing)
try:
# Check if bbox has at least 4 elements
if len(bbox) < 4:
print(f"Warning: Skipping invalid bbox (fewer than 4 coords): {bbox}")
continue
x_min, y_min, x_max, y_max = map(int, bbox[:4])
except (ValueError, TypeError) as e:
print(f"Warning: Could not convert bbox coords to int: {bbox}, Error: {e}")
continue # Skip this bbox
cv2.rectangle(img_res, (x_min, y_min), (x_max, y_max), color, thickness)
label_name = str(label) if class_name_map is None else class_name_map.get(label, str(label))
# Simple text placement above the box
(text_width, text_height), baseline = cv2.getTextSize(label_name, font, font_scale, font_thickness)
text_y = y_min - baseline if y_min - baseline > text_height else y_min + text_height
cv2.putText(img_res, label_name, (x_min, text_y), font, font_scale, color, font_thickness)
return img_res
def visualize_bbox_augmentations(image, bboxes, labels, transform, samples=5):
"""Visualizes original image and augmented versions."""
# Prepare visualization pipeline (strip Normalize, ToTensor)
vis_transform = None
if isinstance(transform, A.Compose):
vis_transform_list = [
t for t in transform
if not isinstance(t, (A.Normalize, A.ToTensorV2))
]
# Recreate Compose with original bbox_params if they exist
# Access bbox_params from the processor if it exists
bbox_processor = transform.processors.get('bboxes')
bbox_params = bbox_processor.params if bbox_processor else None
vis_transform = A.Compose(vis_transform_list, bbox_params=bbox_params)
else:
print("Cannot strip Normalize/ToTensor: transform is not an A.Compose instance.")
vis_transform = transform # Use original transform
if vis_transform is None or 'bboxes' not in vis_transform.processors:
print("Cannot visualize: Pipeline needs A.BboxParams for visualization.")
return
figure, ax = plt.subplots(1, samples + 1, figsize=(15, 5))
# Draw original
original_drawn = draw_bboxes(image, bboxes, labels)
ax[0].imshow(original_drawn)
ax[0].set_title("Original")
ax[0].axis("off")
# Draw augmented samples
for i in range(samples):
try:
# Apply the visualization transform
# Ensure labels are passed correctly based on label_fields
bbox_processor = vis_transform.processors.get('bboxes')
label_fields = bbox_processor.params.label_fields if bbox_processor else []
label_args = {field: labels for field in label_fields}
augmented = vis_transform(image=image, bboxes=bboxes, **label_args)
aug_image = augmented['image']
aug_bboxes = augmented['bboxes']
# Extract labels correctly based on label_fields
if label_fields:
aug_labels = augmented[label_fields[0]]
else:
aug_labels = ['?' for _ in aug_bboxes] # Placeholder if no labels
augmented_drawn = draw_bboxes(aug_image, aug_bboxes, aug_labels)
ax[i+1].imshow(augmented_drawn)
ax[i+1].set_title(f"Augmented {i+1}")
except Exception as e:
print(f"Error during augmentation sample {i+1}: {e}")
ax[i+1].imshow(image) # Show original on error
ax[i+1].set_title(f"Aug Error {i+1}")
finally:
ax[i+1].axis("off")
plt.tight_layout()
plt.show()
# --- Example Usage --- #
# Assuming 'image', 'bboxes', 'class_labels', and 'train_transform' are defined as in Step 3/4
# Load a sample image and annotations
# image = cv2.imread('your_image.jpg')
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# bboxes = [[...], [...]] # In 'coco' format for this example
# class_labels = ['label1', 'label2']
# Define the transform (must include bbox_params with correct format and label_fields)
# train_transform = A.Compose([
# A.RandomCrop(width=450, height=450, p=1.0),
# A.HorizontalFlip(p=0.5),
# A.RandomBrightnessContrast(p=0.2),
# # A.Normalize(...), # Include if used, will be stripped by visualize func
# # A.ToTensorV2(), # Include if used, will be stripped by visualize func
# ], bbox_params=A.BboxParams(format='coco', label_fields=['class_labels']))
# Visualize
# visualize_bbox_augmentations(image, bboxes, class_labels, train_transform, samples=4)