AI Image Captioning Tool Python, AI
👤 Sharing: AI
```python
import torch
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
from PIL import Image
def generate_image_caption(image_path, model_name="nlpconnect/vit-gpt2-image-captioning"):
"""
Generates a caption for an image using a pre-trained Vision Encoder Decoder model.
Args:
image_path (str): The path to the image file.
model_name (str, optional): The name of the pre-trained model to use.
Defaults to "nlpconnect/vit-gpt2-image-captioning".
Returns:
str: The generated caption for the image. Returns None if there is an error.
"""
try:
# Load the model and processor
model = VisionEncoderDecoderModel.from_pretrained(model_name)
image_processor = ViTImageProcessor.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Load the image
image = Image.open(image_path)
# Preprocess the image
if image.mode != "RGB":
image = image.convert("RGB") # Convert to RGB format if necessary
pixel_values = image_processor(images=[image], return_tensors="pt").pixel_values
pixel_values = pixel_values.to(device)
# Generate the caption
model.eval() # Set the model to evaluation mode
with torch.no_grad():
generated_ids = model.generate(pixel_values, max_length=16) # You can adjust max_length
caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return caption
except Exception as e:
print(f"Error: {e}")
return None
if __name__ == '__main__':
# Example usage: Replace 'your_image.jpg' with the actual path to your image.
image_file = 'your_image.jpg'
caption = generate_image_caption(image_file)
if caption:
print(f"Caption for {image_file}: {caption}")
else:
print(f"Failed to generate a caption for {image_file}")
```
👁️ Viewed: 21
Comments