AI Image Captioning Tool Python, AI

👤 Sharing: AI
```python
import torch
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
from PIL import Image

def generate_image_caption(image_path, model_name="nlpconnect/vit-gpt2-image-captioning"):
    """
    Generates a caption for an image using a pre-trained Vision Encoder Decoder model.

    Args:
        image_path (str): The path to the image file.
        model_name (str, optional): The name of the pre-trained model to use. 
                                     Defaults to "nlpconnect/vit-gpt2-image-captioning".

    Returns:
        str: The generated caption for the image.  Returns None if there is an error.
    """
    try:
        # Load the model and processor
        model = VisionEncoderDecoderModel.from_pretrained(model_name)
        image_processor = ViTImageProcessor.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)

        # Move the model to the GPU if available
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)

        # Load the image
        image = Image.open(image_path)

        # Preprocess the image
        if image.mode != "RGB":
            image = image.convert("RGB")  # Convert to RGB format if necessary

        pixel_values = image_processor(images=[image], return_tensors="pt").pixel_values
        pixel_values = pixel_values.to(device)


        # Generate the caption
        model.eval()  # Set the model to evaluation mode

        with torch.no_grad():
            generated_ids = model.generate(pixel_values, max_length=16) # You can adjust max_length

        caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

        return caption

    except Exception as e:
        print(f"Error: {e}")
        return None


if __name__ == '__main__':
    # Example usage:  Replace 'your_image.jpg' with the actual path to your image.
    image_file = 'your_image.jpg'
    caption = generate_image_caption(image_file)

    if caption:
        print(f"Caption for {image_file}: {caption}")
    else:
        print(f"Failed to generate a caption for {image_file}")
```
👁️ Viewed: 21

Comments