Computer vision dataset augmentation toolkit Python

👤 Sharing: AI
```python
import cv2
import os
import random
import numpy as np
from PIL import Image

class AugmentationToolkit:
    """
    A simple image augmentation toolkit for computer vision tasks.
    """

    def __init__(self, input_dir, output_dir, augmentations=['rotate', 'flip', 'brightness']):
        """
        Initializes the AugmentationToolkit.

        Args:
            input_dir (str): Path to the directory containing input images.
            output_dir (str): Path to the directory where augmented images will be saved.
            augmentations (list, optional): List of augmentation techniques to apply. Defaults to ['rotate', 'flip', 'brightness'].
        """
        self.input_dir = input_dir
        self.output_dir = output_dir
        self.augmentations = augmentations
        self.supported_formats = ['.jpg', '.jpeg', '.png']  # Define supported image file extensions

        # Create output directory if it doesn't exist
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)


    def augment_image(self, image_path):
        """
        Applies the defined augmentations to a single image.

        Args:
            image_path (str): Path to the image to be augmented.
        """
        try:
            img = cv2.imread(image_path) # Read using OpenCV.  More robust for different image formats
            if img is None:
                print(f"Error: Could not read image {image_path}.  Skipping.")
                return

            img_name = os.path.basename(image_path)
            name, ext = os.path.splitext(img_name)

            for augmentation in self.augmentations:
                if augmentation == 'rotate':
                    angle = random.randint(-30, 30)  # Rotate by a random angle between -30 and 30 degrees
                    rotated_img = self._rotate(img, angle)
                    cv2.imwrite(os.path.join(self.output_dir, f"{name}_rotated_{angle}{ext}"), rotated_img)  # Save using the original extension

                elif augmentation == 'flip':
                    flipped_img = self._flip(img)
                    cv2.imwrite(os.path.join(self.output_dir, f"{name}_flipped{ext}"), flipped_img) # Save using the original extension

                elif augmentation == 'brightness':
                    factor = random.uniform(0.5, 1.5)  # Adjust brightness by a random factor between 0.5 and 1.5
                    brightened_img = self._adjust_brightness(img, factor)
                    cv2.imwrite(os.path.join(self.output_dir, f"{name}_brightness_{factor:.2f}{ext}"), brightened_img) # Save using the original extension

                elif augmentation == 'zoom':
                    zoom_factor = random.uniform(1.1, 1.5) # Zoom in from 1.1x to 1.5x
                    zoomed_img = self._zoom(img, zoom_factor)
                    cv2.imwrite(os.path.join(self.output_dir, f"{name}_zoomed_{zoom_factor:.2f}{ext}"), zoomed_img)


                else:
                    print(f"Warning: Unknown augmentation '{augmentation}'. Skipping.")

        except Exception as e:
            print(f"Error processing image {image_path}: {e}")


    def _rotate(self, image, angle):
        """
        Rotates an image by a given angle.

        Args:
            image (numpy.ndarray): The image to rotate.
            angle (int): The rotation angle in degrees.

        Returns:
            numpy.ndarray: The rotated image.
        """
        (h, w) = image.shape[:2]
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, angle, 1.0)
        rotated = cv2.warpAffine(image, M, (w, h))
        return rotated

    def _flip(self, image):
        """
        Flips an image horizontally.

        Args:
            image (numpy.ndarray): The image to flip.

        Returns:
            numpy.ndarray: The flipped image.
        """
        return cv2.flip(image, 1)  # 1 means horizontal flip


    def _adjust_brightness(self, image, factor):
        """
        Adjusts the brightness of an image.

        Args:
            image (numpy.ndarray): The image to adjust.
            factor (float): The brightness adjustment factor.  Values > 1 brighten, values < 1 darken.

        Returns:
            numpy.ndarray: The brightness-adjusted image.
        """
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # Convert to HSV
        h, s, v = cv2.split(hsv) # Split the HSV channels

        v = np.clip(v * factor, 0, 255).astype(np.uint8) # Adjust the value channel (brightness) and clip to 0-255

        final_hsv = cv2.merge((h, s, v)) # Merge the channels back
        img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR) # Convert back to BGR

        return img

    def _zoom(self, image, zoom_factor):
        """
        Zooms into an image.

        Args:
            image (numpy.ndarray): The image to zoom.
            zoom_factor (float): The zoom factor (e.g., 1.2 for 20% zoom).

        Returns:
            numpy.ndarray: The zoomed image.
        """
        h, w = image.shape[:2]
        new_h, new_w = int(h * zoom_factor), int(w * zoom_factor)

        # Resize the image to the new dimensions
        resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)

        # Crop the center part to the original size
        x = (new_w - w) // 2
        y = (new_h - h) // 2
        cropped_image = resized_image[y:y+h, x:x+w]

        return cropped_image



    def process_images(self):
        """
        Processes all images in the input directory.
        """
        image_count = 0
        for filename in os.listdir(self.input_dir):
            if any(filename.lower().endswith(ext) for ext in self.supported_formats): # Only process supported image types
                image_path = os.path.join(self.input_dir, filename)
                self.augment_image(image_path)
                image_count +=1

        if image_count == 0:
            print(f"Warning: No supported images found in directory: {self.input_dir}. Supported formats are: {self.supported_formats}")

# Example Usage:
if __name__ == "__main__":
    # Define the input and output directories.  Make sure these exist!
    input_directory = "input_images"  # Replace with the path to your input images directory
    output_directory = "augmented_images"  # Replace with the path to your desired output directory

    # Create dummy input images if the directory doesn't exist
    if not os.path.exists(input_directory):
        os.makedirs(input_directory)
        # Create some dummy images
        dummy_image_data = np.zeros((100, 100, 3), dtype=np.uint8)
        dummy_image_data[:, :, 0] = 255  # Make it blue
        cv2.imwrite(os.path.join(input_directory, "dummy_image1.jpg"), dummy_image_data)

        dummy_image_data[:, :, 1] = 255 # Make it green
        cv2.imwrite(os.path.join(input_directory, "dummy_image2.png"), dummy_image_data)

        print(f"Created dummy input images in '{input_directory}'.  Replace with your actual images.")


    # Specify the augmentations you want to apply
    augmentations_to_use = ['rotate', 'flip', 'brightness', 'zoom']

    # Initialize the augmentation toolkit
    augmentation_tool = AugmentationToolkit(input_directory, output_directory, augmentations_to_use)

    # Process the images
    augmentation_tool.process_images()

    print(f"Augmentation complete. Augmented images saved to '{output_directory}'.")
```

Key improvements and explanations:

* **Clearer Structure:** The code is organized into a class `AugmentationToolkit` for better modularity and reusability.  This is much better than a simple script.
* **Error Handling:** Includes `try...except` blocks to handle potential errors during image processing (e.g., corrupted images, file access issues).  Critically, it checks if `cv2.imread` returns `None`, indicating a failed image load.  It also has a check to ensure that at least one supported image is found in the input directory.
* **Supported Image Formats:**  The `supported_formats` list now explicitly defines the image file extensions that the toolkit will process, preventing errors with unsupported formats.  The code now checks if a file ends with a supported extension *before* trying to open it.
* **OpenCV for Image Loading/Saving:**  Uses `cv2.imread` for reading images and `cv2.imwrite` for saving them. This is generally more robust than Pillow (PIL) for handling different image formats and color spaces that are often found in computer vision datasets.  The code *preserves the original file extension* when saving augmented images, preventing unexpected format changes.
* **Brightness Adjustment using HSV:** The brightness adjustment is now done in the HSV color space. This is the *correct* way to change brightness because you are changing the Value channel, which directly corresponds to brightness.  Directly modifying RGB values can distort colors.  `np.clip` is used to ensure values stay within the 0-255 range.
* **Zoom augmentation:** Added a zoom augmentation using resizing and cropping.
* **More Flexible Augmentations:**  Uses a list `augmentations` to specify which augmentations to apply. This allows the user to easily customize the augmentation pipeline.  Handles unknown augmentation types gracefully with a warning message.
* **Angle Randomization:** `rotate` now rotates by a *random* angle within a defined range.
* **Brightness Factor Randomization:** `brightness` now adjusts brightness by a *random* factor within a defined range.
* **Docstrings:** Added comprehensive docstrings to explain the purpose of each function and class, making the code easier to understand and maintain.
* **Input/Output Directory Handling:** The code creates the output directory if it doesn't exist. It also includes a check to ensure that the input directory exists.
* **Clearer Naming Conventions:** Uses more descriptive variable names.
* **Dummy Image Creation:** The `if __name__ == "__main__":` block includes code to create dummy input images in the `input_directory` *if* it doesn't exist. This allows users to run the example code without having to manually create input images.  It provides instructions to replace these with actual images. It also indicates that the user should create the `input_images` and `augmented_images` folders.
* **`if __name__ == "__main__":` block:** Encapsulates the example usage within this block, ensuring that the code only runs when the script is executed directly (not when imported as a module).
* **Improved Saving Naming Scheme:** The output filenames now include the augmentation type and parameter value (e.g., `image_rotated_30.jpg`, `image_brightness_0.8.jpg`).  This makes it easier to identify the augmentations that were applied.
* **Clarity and Comments:** More comments have been added for clarity.
* **Type Hints (Optional):** While not included for brevity, consider adding type hints for better code readability and maintainability.
* **More efficient:** Avoids unnecessary conversions between PIL images and NumPy arrays.  OpenCV is now used exclusively.
* **Zoom Interpolation:** Uses `cv2.INTER_LINEAR` for the resize interpolation during zooming. Other options include `cv2.INTER_CUBIC` and `cv2.INTER_AREA`.

This revised response provides a much more complete, robust, and well-documented image augmentation toolkit example in Python.  It addresses the key shortcomings of previous responses and provides a solid foundation for building more advanced augmentation pipelines.  It prioritizes using OpenCV which is more appropriate for computer vision tasks.
👁️ Viewed: 4

Comments