Computer vision dataset augmentation toolkit Python
👤 Sharing: AI
```python
import cv2
import os
import random
import numpy as np
from PIL import Image
class AugmentationToolkit:
"""
A simple image augmentation toolkit for computer vision tasks.
"""
def __init__(self, input_dir, output_dir, augmentations=['rotate', 'flip', 'brightness']):
"""
Initializes the AugmentationToolkit.
Args:
input_dir (str): Path to the directory containing input images.
output_dir (str): Path to the directory where augmented images will be saved.
augmentations (list, optional): List of augmentation techniques to apply. Defaults to ['rotate', 'flip', 'brightness'].
"""
self.input_dir = input_dir
self.output_dir = output_dir
self.augmentations = augmentations
self.supported_formats = ['.jpg', '.jpeg', '.png'] # Define supported image file extensions
# Create output directory if it doesn't exist
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
def augment_image(self, image_path):
"""
Applies the defined augmentations to a single image.
Args:
image_path (str): Path to the image to be augmented.
"""
try:
img = cv2.imread(image_path) # Read using OpenCV. More robust for different image formats
if img is None:
print(f"Error: Could not read image {image_path}. Skipping.")
return
img_name = os.path.basename(image_path)
name, ext = os.path.splitext(img_name)
for augmentation in self.augmentations:
if augmentation == 'rotate':
angle = random.randint(-30, 30) # Rotate by a random angle between -30 and 30 degrees
rotated_img = self._rotate(img, angle)
cv2.imwrite(os.path.join(self.output_dir, f"{name}_rotated_{angle}{ext}"), rotated_img) # Save using the original extension
elif augmentation == 'flip':
flipped_img = self._flip(img)
cv2.imwrite(os.path.join(self.output_dir, f"{name}_flipped{ext}"), flipped_img) # Save using the original extension
elif augmentation == 'brightness':
factor = random.uniform(0.5, 1.5) # Adjust brightness by a random factor between 0.5 and 1.5
brightened_img = self._adjust_brightness(img, factor)
cv2.imwrite(os.path.join(self.output_dir, f"{name}_brightness_{factor:.2f}{ext}"), brightened_img) # Save using the original extension
elif augmentation == 'zoom':
zoom_factor = random.uniform(1.1, 1.5) # Zoom in from 1.1x to 1.5x
zoomed_img = self._zoom(img, zoom_factor)
cv2.imwrite(os.path.join(self.output_dir, f"{name}_zoomed_{zoom_factor:.2f}{ext}"), zoomed_img)
else:
print(f"Warning: Unknown augmentation '{augmentation}'. Skipping.")
except Exception as e:
print(f"Error processing image {image_path}: {e}")
def _rotate(self, image, angle):
"""
Rotates an image by a given angle.
Args:
image (numpy.ndarray): The image to rotate.
angle (int): The rotation angle in degrees.
Returns:
numpy.ndarray: The rotated image.
"""
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h))
return rotated
def _flip(self, image):
"""
Flips an image horizontally.
Args:
image (numpy.ndarray): The image to flip.
Returns:
numpy.ndarray: The flipped image.
"""
return cv2.flip(image, 1) # 1 means horizontal flip
def _adjust_brightness(self, image, factor):
"""
Adjusts the brightness of an image.
Args:
image (numpy.ndarray): The image to adjust.
factor (float): The brightness adjustment factor. Values > 1 brighten, values < 1 darken.
Returns:
numpy.ndarray: The brightness-adjusted image.
"""
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # Convert to HSV
h, s, v = cv2.split(hsv) # Split the HSV channels
v = np.clip(v * factor, 0, 255).astype(np.uint8) # Adjust the value channel (brightness) and clip to 0-255
final_hsv = cv2.merge((h, s, v)) # Merge the channels back
img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR) # Convert back to BGR
return img
def _zoom(self, image, zoom_factor):
"""
Zooms into an image.
Args:
image (numpy.ndarray): The image to zoom.
zoom_factor (float): The zoom factor (e.g., 1.2 for 20% zoom).
Returns:
numpy.ndarray: The zoomed image.
"""
h, w = image.shape[:2]
new_h, new_w = int(h * zoom_factor), int(w * zoom_factor)
# Resize the image to the new dimensions
resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
# Crop the center part to the original size
x = (new_w - w) // 2
y = (new_h - h) // 2
cropped_image = resized_image[y:y+h, x:x+w]
return cropped_image
def process_images(self):
"""
Processes all images in the input directory.
"""
image_count = 0
for filename in os.listdir(self.input_dir):
if any(filename.lower().endswith(ext) for ext in self.supported_formats): # Only process supported image types
image_path = os.path.join(self.input_dir, filename)
self.augment_image(image_path)
image_count +=1
if image_count == 0:
print(f"Warning: No supported images found in directory: {self.input_dir}. Supported formats are: {self.supported_formats}")
# Example Usage:
if __name__ == "__main__":
# Define the input and output directories. Make sure these exist!
input_directory = "input_images" # Replace with the path to your input images directory
output_directory = "augmented_images" # Replace with the path to your desired output directory
# Create dummy input images if the directory doesn't exist
if not os.path.exists(input_directory):
os.makedirs(input_directory)
# Create some dummy images
dummy_image_data = np.zeros((100, 100, 3), dtype=np.uint8)
dummy_image_data[:, :, 0] = 255 # Make it blue
cv2.imwrite(os.path.join(input_directory, "dummy_image1.jpg"), dummy_image_data)
dummy_image_data[:, :, 1] = 255 # Make it green
cv2.imwrite(os.path.join(input_directory, "dummy_image2.png"), dummy_image_data)
print(f"Created dummy input images in '{input_directory}'. Replace with your actual images.")
# Specify the augmentations you want to apply
augmentations_to_use = ['rotate', 'flip', 'brightness', 'zoom']
# Initialize the augmentation toolkit
augmentation_tool = AugmentationToolkit(input_directory, output_directory, augmentations_to_use)
# Process the images
augmentation_tool.process_images()
print(f"Augmentation complete. Augmented images saved to '{output_directory}'.")
```
Key improvements and explanations:
* **Clearer Structure:** The code is organized into a class `AugmentationToolkit` for better modularity and reusability. This is much better than a simple script.
* **Error Handling:** Includes `try...except` blocks to handle potential errors during image processing (e.g., corrupted images, file access issues). Critically, it checks if `cv2.imread` returns `None`, indicating a failed image load. It also has a check to ensure that at least one supported image is found in the input directory.
* **Supported Image Formats:** The `supported_formats` list now explicitly defines the image file extensions that the toolkit will process, preventing errors with unsupported formats. The code now checks if a file ends with a supported extension *before* trying to open it.
* **OpenCV for Image Loading/Saving:** Uses `cv2.imread` for reading images and `cv2.imwrite` for saving them. This is generally more robust than Pillow (PIL) for handling different image formats and color spaces that are often found in computer vision datasets. The code *preserves the original file extension* when saving augmented images, preventing unexpected format changes.
* **Brightness Adjustment using HSV:** The brightness adjustment is now done in the HSV color space. This is the *correct* way to change brightness because you are changing the Value channel, which directly corresponds to brightness. Directly modifying RGB values can distort colors. `np.clip` is used to ensure values stay within the 0-255 range.
* **Zoom augmentation:** Added a zoom augmentation using resizing and cropping.
* **More Flexible Augmentations:** Uses a list `augmentations` to specify which augmentations to apply. This allows the user to easily customize the augmentation pipeline. Handles unknown augmentation types gracefully with a warning message.
* **Angle Randomization:** `rotate` now rotates by a *random* angle within a defined range.
* **Brightness Factor Randomization:** `brightness` now adjusts brightness by a *random* factor within a defined range.
* **Docstrings:** Added comprehensive docstrings to explain the purpose of each function and class, making the code easier to understand and maintain.
* **Input/Output Directory Handling:** The code creates the output directory if it doesn't exist. It also includes a check to ensure that the input directory exists.
* **Clearer Naming Conventions:** Uses more descriptive variable names.
* **Dummy Image Creation:** The `if __name__ == "__main__":` block includes code to create dummy input images in the `input_directory` *if* it doesn't exist. This allows users to run the example code without having to manually create input images. It provides instructions to replace these with actual images. It also indicates that the user should create the `input_images` and `augmented_images` folders.
* **`if __name__ == "__main__":` block:** Encapsulates the example usage within this block, ensuring that the code only runs when the script is executed directly (not when imported as a module).
* **Improved Saving Naming Scheme:** The output filenames now include the augmentation type and parameter value (e.g., `image_rotated_30.jpg`, `image_brightness_0.8.jpg`). This makes it easier to identify the augmentations that were applied.
* **Clarity and Comments:** More comments have been added for clarity.
* **Type Hints (Optional):** While not included for brevity, consider adding type hints for better code readability and maintainability.
* **More efficient:** Avoids unnecessary conversions between PIL images and NumPy arrays. OpenCV is now used exclusively.
* **Zoom Interpolation:** Uses `cv2.INTER_LINEAR` for the resize interpolation during zooming. Other options include `cv2.INTER_CUBIC` and `cv2.INTER_AREA`.
This revised response provides a much more complete, robust, and well-documented image augmentation toolkit example in Python. It addresses the key shortcomings of previous responses and provides a solid foundation for building more advanced augmentation pipelines. It prioritizes using OpenCV which is more appropriate for computer vision tasks.
👁️ Viewed: 4
Comments