AI-powered Auto Captioning Python, AI

👤 Sharing: AI
```python
import speech_recognition as sr
import moviepy.editor as mp
from pydub import AudioSegment
import os

def auto_caption(video_path, output_srt_path, language="en-US"):
    """
    Automatically generates subtitles for a video using speech recognition.

    Args:
        video_path (str): The path to the video file.
        output_srt_path (str): The path to save the generated SRT file.
        language (str, optional): The language of the video. Defaults to "en-US".
    """

    try:
        # 1. Extract Audio from Video
        video = mp.VideoFileClip(video_path)
        audio_path = "temp_audio.wav"
        video.audio.write_audiofile(audio_path, codec='pcm_s16le')  #PCM required by some libraries

        # 2. Split Audio into Chunks (for better transcription)
        audio = AudioSegment.from_wav(audio_path)
        chunk_length_ms = 60000  # milliseconds (e.g., 60 seconds)
        chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]

        # 3. Transcribe Audio Chunks
        recognizer = sr.Recognizer()
        full_transcript = []
        current_time = 0

        for i, chunk in enumerate(chunks):
            chunk_file = f"temp_chunk_{i}.wav"
            chunk.export(chunk_file, format="wav")

            with sr.AudioFile(chunk_file) as source:
                audio_data = recognizer.record(source)
            try:
                text = recognizer.recognize_google(audio_data, language=language)
                print(f"Chunk {i+1}: {text}")  #Print the text for debugging
                full_transcript.append((current_time / 1000.0, text))  # Store start time in seconds
            except sr.UnknownValueError:
                print(f"Chunk {i+1}: Could not understand audio")
                full_transcript.append((current_time / 1000.0, ""))  #Store a blank line
            except sr.RequestError as e:
                print(f"Chunk {i+1}: Could not request results from Google Speech Recognition service; {e}")
                full_transcript.append((current_time / 1000.0, "")) #Store a blank line

            current_time += chunk_length_ms
            os.remove(chunk_file)  # Clean up chunk file


        # 4. Create SRT File
        with open(output_srt_path, "w", encoding="utf-8") as srt_file:
            for i, (start_time, text) in enumerate(full_transcript):
                end_time = start_time + 5 # Arbitrary end time to show text (adjust as needed)
                srt_file.write(f"{i+1}\n")
                srt_file.write(f"{format_time(start_time)} --> {format_time(end_time)}\n")
                srt_file.write(f"{text}\n\n")


        # 5. Cleanup
        video.close()
        os.remove(audio_path)  # Clean up audio file


        print(f"Subtitles saved to {output_srt_path}")

    except Exception as e:
        print(f"An error occurred: {e}")


def format_time(seconds):
    """
    Converts seconds to SRT time format (HH:MM:SS,mmm).
    """
    milliseconds = int(seconds * 1000)
    hours = milliseconds // (3600 * 1000)
    milliseconds %= (3600 * 1000)
    minutes = milliseconds // (60 * 1000)
    milliseconds %= (60 * 1000)
    seconds = milliseconds // 1000
    milliseconds %= 1000
    return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"




if __name__ == '__main__':
    # Example Usage:
    video_file = "my_video.mp4"  # Replace with your video file
    srt_file = "my_subtitles.srt"  # Replace with your desired SRT file name
    language = "en-US"  # Change if your video is in a different language

    auto_caption(video_file, srt_file, language)
```
👁️ Viewed: 9

Comments