AI-powered Auto Captioning Python, AI
👤 Sharing: AI
```python
import speech_recognition as sr
import moviepy.editor as mp
from pydub import AudioSegment
import os
def auto_caption(video_path, output_srt_path, language="en-US"):
"""
Automatically generates subtitles for a video using speech recognition.
Args:
video_path (str): The path to the video file.
output_srt_path (str): The path to save the generated SRT file.
language (str, optional): The language of the video. Defaults to "en-US".
"""
try:
# 1. Extract Audio from Video
video = mp.VideoFileClip(video_path)
audio_path = "temp_audio.wav"
video.audio.write_audiofile(audio_path, codec='pcm_s16le') #PCM required by some libraries
# 2. Split Audio into Chunks (for better transcription)
audio = AudioSegment.from_wav(audio_path)
chunk_length_ms = 60000 # milliseconds (e.g., 60 seconds)
chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
# 3. Transcribe Audio Chunks
recognizer = sr.Recognizer()
full_transcript = []
current_time = 0
for i, chunk in enumerate(chunks):
chunk_file = f"temp_chunk_{i}.wav"
chunk.export(chunk_file, format="wav")
with sr.AudioFile(chunk_file) as source:
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data, language=language)
print(f"Chunk {i+1}: {text}") #Print the text for debugging
full_transcript.append((current_time / 1000.0, text)) # Store start time in seconds
except sr.UnknownValueError:
print(f"Chunk {i+1}: Could not understand audio")
full_transcript.append((current_time / 1000.0, "")) #Store a blank line
except sr.RequestError as e:
print(f"Chunk {i+1}: Could not request results from Google Speech Recognition service; {e}")
full_transcript.append((current_time / 1000.0, "")) #Store a blank line
current_time += chunk_length_ms
os.remove(chunk_file) # Clean up chunk file
# 4. Create SRT File
with open(output_srt_path, "w", encoding="utf-8") as srt_file:
for i, (start_time, text) in enumerate(full_transcript):
end_time = start_time + 5 # Arbitrary end time to show text (adjust as needed)
srt_file.write(f"{i+1}\n")
srt_file.write(f"{format_time(start_time)} --> {format_time(end_time)}\n")
srt_file.write(f"{text}\n\n")
# 5. Cleanup
video.close()
os.remove(audio_path) # Clean up audio file
print(f"Subtitles saved to {output_srt_path}")
except Exception as e:
print(f"An error occurred: {e}")
def format_time(seconds):
"""
Converts seconds to SRT time format (HH:MM:SS,mmm).
"""
milliseconds = int(seconds * 1000)
hours = milliseconds // (3600 * 1000)
milliseconds %= (3600 * 1000)
minutes = milliseconds // (60 * 1000)
milliseconds %= (60 * 1000)
seconds = milliseconds // 1000
milliseconds %= 1000
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
if __name__ == '__main__':
# Example Usage:
video_file = "my_video.mp4" # Replace with your video file
srt_file = "my_subtitles.srt" # Replace with your desired SRT file name
language = "en-US" # Change if your video is in a different language
auto_caption(video_file, srt_file, language)
```
👁️ Viewed: 9
Comments