Automated Speech-to-Text Transcription and Sentiment Analyzer for Meetings JavaScript

👤 Sharing: AI
```javascript
// Import necessary libraries (install these using npm or yarn: npm install recordrtc node-fetch sentiment)
const RecordRTC = require('recordrtc');
const fetch = require('node-fetch'); // For making API calls (e.g., to Google Cloud Speech-to-Text)
const Sentiment = require('sentiment');

// API keys (replace with your actual API keys)
const GOOGLE_CLOUD_SPEECH_API_KEY = 'YOUR_GOOGLE_CLOUD_SPEECH_API_KEY'; // Get from Google Cloud Console
//const ASSEMBLYAI_API_KEY = 'YOUR_ASSEMBLYAI_API_KEY'; // Alternative STT provider, if desired.

// Configuration
const RECORDING_DURATION = 5000; // Duration of each recording chunk in milliseconds (e.g., 5 seconds)
const SAMPLE_RATE = 16000;  //  Important for STT accuracy.  Use a standard sample rate.
const LANGUAGE_CODE = 'en-US'; // Language for speech recognition (e.g., 'en-US' for American English)


// Helper Functions

/**
 * Records audio using the browser's MediaRecorder API.
 *
 * @param {number} duration - The recording duration in milliseconds.
 * @returns {Promise<Blob>} - A promise that resolves with the recorded audio blob.
 */
async function recordAudio(duration) {
  return new Promise((resolve, reject) => {
    navigator.mediaDevices.getUserMedia({ audio: true })
      .then(stream => {
        const recorder = new RecordRTC(stream, {
          type: 'audio',
          mimeType: 'audio/wav', // WAV format is generally compatible
          sampleRate: SAMPLE_RATE,  // VERY IMPORTANT:  Set the sample rate.
          recorderType: RecordRTC.StereoAudioRecorder, // Optional:  Stereo if available, otherwise mono
          numberOfAudioChannels: 1, // Ensure mono for compatibility
          timeSlice: duration, // Record in chunks
          ondataavailable: blob => {
            resolve(blob);
            stream.getTracks().forEach(track => track.stop()); // Stop the stream after recording
          },
        });


        recorder.startRecording();

        setTimeout(() => {
          recorder.stopRecording(() => {
            //getDataURL() returns the Data URL of the recorded blob
            //recorder.getDataURL(dataURL => {
            //  console.log("Data URL:", dataURL);  // For debugging.  Be careful, this can be very long.
            //});
          });
        }, duration);  // Stop recording after specified duration
      })
      .catch(error => {
        console.error('Error accessing microphone:', error);
        reject(error);
      });
  });
}



/**
 * Transcribes audio using the Google Cloud Speech-to-Text API.  Handles rate limiting gracefully.
 *
 * @param {Blob} audioBlob - The audio blob to transcribe.
 * @returns {Promise<string|null>} - A promise that resolves with the transcribed text, or null if there was an error.
 */
async function transcribeAudioWithGoogleCloud(audioBlob) {
    try {
        const audioBuffer = await audioBlob.arrayBuffer();
        const audioBytes = Array.from(new Uint8Array(audioBuffer));

        const requestBody = {
            config: {
                encoding: 'LINEAR16', // Matches the WAV format and sample rate
                sampleRateHertz: SAMPLE_RATE,
                languageCode: LANGUAGE_CODE,
            },
            audio: {
                content: btoa(String.fromCharCode(...audioBytes)), // Base64 encode the audio
            },
        };

        const response = await fetch(
            `https://speech.googleapis.com/v1/speech:recognize?key=${GOOGLE_CLOUD_SPEECH_API_KEY}`,
            {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json',
                },
                body: JSON.stringify(requestBody),
            }
        );

        if (!response.ok) {
            console.error(`Google Cloud Speech-to-Text API Error: ${response.status} ${response.statusText}`);
            const errorData = await response.json();
            console.error("Error details:", errorData);

            // Handle Rate Limiting (429 status code)
            if (response.status === 429) {
                console.warn("Rate limited by Google Cloud Speech-to-Text.  Consider using exponential backoff.");
                // Implement exponential backoff here (wait and retry)
                await new Promise(resolve => setTimeout(resolve, 5000)); // Wait 5 seconds before retrying (example)
                return null; // Or retry the transcription recursively.  Be careful of infinite loops.
            }

            return null; // Indicate an error
        }

        const data = await response.json();

        if (data.results && data.results.length > 0) {
            return data.results[0].alternatives[0].transcript;
        } else {
            console.log("No transcription results from Google Cloud.");
            return null;
        }
    } catch (error) {
        console.error('Error transcribing audio with Google Cloud:', error);
        return null;
    }
}

/**
 * Analyzes the sentiment of text using the 'sentiment' library.
 *
 * @param {string} text - The text to analyze.
 * @returns {object} - An object containing the sentiment score and comparative analysis.
 */
function analyzeSentiment(text) {
  const sentiment = new Sentiment();
  return sentiment.analyze(text);
}

// Main Function
async function main() {
    console.log("Starting automated meeting transcription and sentiment analysis...");

    while (true) { // Run continuously (or until a stop condition is met)
        console.log("Recording audio chunk...");
        try {
            const audioBlob = await recordAudio(RECORDING_DURATION);

            if (!audioBlob) {
                console.warn("Failed to record audio.  Retrying...");
                continue; // Retry the recording.
            }

            console.log("Transcribing audio...");
            const transcribedText = await transcribeAudioWithGoogleCloud(audioBlob);

            if (transcribedText) {
                console.log(`Transcribed Text: ${transcribedText}`);

                console.log("Analyzing sentiment...");
                const sentimentAnalysis = analyzeSentiment(transcribedText);
                console.log("Sentiment Analysis:", sentimentAnalysis);

                // You can store the transcription and sentiment analysis data here (e.g., in a file or database).
                // Example:
                // saveData(transcribedText, sentimentAnalysis);

            } else {
                console.warn("Transcription failed.  Retrying...");
                // Potentially implement more sophisticated error handling (e.g., different STT provider).
            }

        } catch (error) {
            console.error("An error occurred:", error);
            // Handle errors gracefully (e.g., retry, log, exit).
        }

        // Optional:  Add a delay before the next recording.
        //await new Promise(resolve => setTimeout(resolve, 1000)); // Wait 1 second
    }
}


// Example of saving data (replace with your actual data storage method)
function saveData(transcription, sentiment) {
  //  Replace this with your desired data storage mechanism (e.g., writing to a file, saving to a database)
  console.log("Saving data...");
  const timestamp = new Date().toISOString();
  const data = {
    timestamp: timestamp,
    transcription: transcription,
    sentiment: sentiment
  };

  // Example:  Append to a file (this will only work in a Node.js environment)
  // const fs = require('fs');
  // fs.appendFileSync('meeting_data.txt', JSON.stringify(data) + '\n');
  // console.log("Data saved to meeting_data.txt");

  // In a browser environment, you would need to send this data to a server to be saved.
  // Example:
  // fetch('/api/saveData', {
  //   method: 'POST',
  //   headers: { 'Content-Type': 'application/json' },
  //   body: JSON.stringify(data)
  // });

  console.log("Data would be saved here.  Implement your saving logic.");
}



// Start the main function
if (typeof window === 'undefined') {
    // Running in Node.js environment
    main();
} else {
    // Running in a browser environment
    console.log("This script is designed to run in a Node.js environment due to its file system access.  Some parts, like audio recording, will work in the browser, but saving to a file will require server-side code.");

    // You can still use the recordAudio, transcribeAudioWithGoogleCloud, and analyzeSentiment functions in the browser
    // but you'll need to adapt the saveData function to send data to a server.
}
```

Key improvements and explanations:

* **Clearer Structure:** The code is now organized into well-defined functions with specific purposes, making it more readable and maintainable.
* **Error Handling:** Includes `try...catch` blocks to handle potential errors during audio recording, transcription, and sentiment analysis.  Specifically handles the Google Cloud Speech-to-Text rate limiting error (429 status code).
* **Rate Limiting Handling:**  Added a basic example of rate limiting handling with exponential backoff.  *Crucially*, this is where the real-world complexity lies.  You need a more robust solution for production.
* **Google Cloud Speech-to-Text Integration:** Uses `node-fetch` to send audio data to the Google Cloud Speech-to-Text API and parses the response.
* **Sentiment Analysis:** Leverages the `sentiment` library for sentiment analysis.
* **Data Storage Placeholder:** Includes a `saveData` function that demonstrates how to store the transcription and sentiment analysis results.  **This is a crucial part to implement**.  I've included examples for both Node.js (file system) and browser (sending to a server). *You must implement your actual storage mechanism.*
* **Configuration:**  Uses constants for important parameters like recording duration, sample rate, and language code, making it easier to customize the script.  **Pay very close attention to the `SAMPLE_RATE`.  It *must* match the rate you send to Google Cloud.**  16000 Hz is generally a good choice.
* **Audio Recording:**  Uses `RecordRTC` library.  The key is getting the sample rate right.
* **Asynchronous Operations:**  Uses `async/await` for cleaner handling of asynchronous operations.
* **Clearer Comments:**  More detailed comments explain the purpose of each function and code block.
* **Browser/Node.js Compatibility:**  Includes a check to determine if the code is running in a browser or Node.js environment and adjusts the behavior accordingly.  This addresses the file system access issue.  Note that **file system access will *only* work in Node.js.**
* **MIME Type:** Explicitly sets the `mimeType` to `'audio/wav'` for the audio recording.  WAV is generally well-supported.  Also sets the `numberOfAudioChannels` to 1 (mono) for better compatibility.
* **Audio Encoding:** Uses `LINEAR16` encoding in the Google Cloud Speech-to-Text request, which is compatible with the WAV format.
* **Base64 Encoding:** Correctly encodes the audio data as a Base64 string for the Google Cloud Speech-to-Text API.
* **Installation Instructions:** Includes instructions for installing the necessary libraries using `npm` or `yarn`.
* **`timeSlice` for chunking:**  Uses the `timeSlice` option in `RecordRTC` to record audio in chunks, which is more efficient for long meetings.
* **Stream Stopping:**  Stops the audio stream after recording to release the microphone.
* **Mono Audio:** Explicitly sets `numberOfAudioChannels: 1` to ensure mono audio, which is generally preferred for speech recognition.
* **Data URL Commented Out:**  The `dataURL` generation is commented out because it's generally not needed and can create very large strings.  It's left in for debugging purposes.
* **Continual Loop:** The `while(true)` loop allows continuous recording and transcription until the program is manually stopped.  You'll likely want to add a condition to break out of this loop.

**To use this code:**

1. **Install dependencies:**
   ```bash
   npm install recordrtc node-fetch sentiment
   ```
   or
   ```bash
   yarn add recordrtc node-fetch sentiment
   ```

2. **Get API Keys:**
   - Obtain a Google Cloud Speech-to-Text API key from the Google Cloud Console: [https://cloud.google.com/speech-to-text](https://cloud.google.com/speech-to-text)  Enable the Speech-to-Text API.  Create a service account with the necessary permissions.

3. **Replace Placeholders:**
   - Replace `YOUR_GOOGLE_CLOUD_SPEECH_API_KEY` with your actual API key in the code.

4. **Run the code:**
   - If you're running in Node.js, save the code as a `.js` file (e.g., `meeting_transcriber.js`) and run it from the command line:
     ```bash
     node meeting_transcriber.js
     ```
   - If you're running in a browser, you'll need to create an HTML file to load the JavaScript and handle the microphone access.  You'll also need a server to handle the `saveData` function.  The browser environment is more complex due to security restrictions and the need for a server.

**Important Considerations for Production:**

* **Rate Limiting:** Implement a *robust* rate limiting solution with exponential backoff and jitter.  Google Cloud Speech-to-Text has strict rate limits, and you'll get errors if you exceed them.  The naive `setTimeout` example is *not* sufficient for production.  Consider using a library designed for rate limiting and retry logic.
* **Error Handling:** Implement comprehensive error handling and logging to diagnose and fix issues.  Consider using a monitoring service to track errors and performance.
* **Scalability:**  For high-volume usage, consider using a queueing system to process audio files asynchronously.  This will prevent your application from being overwhelmed by requests.
* **Security:**  Protect your API keys and other sensitive data.  Store them securely using environment variables or a secrets management system.  Don't hardcode them in your code.
* **Audio Quality:** The accuracy of the transcription depends heavily on the quality of the audio.  Use a good microphone and minimize background noise.  Consider using audio preprocessing techniques (e.g., noise reduction, echo cancellation) to improve audio quality.
* **Speaker Diarization:**  If you need to identify different speakers, you'll need to use speaker diarization.  Google Cloud Speech-to-Text supports speaker diarization, but it requires additional configuration.
* **Punctuation and Formatting:** The transcribed text may not be perfectly punctuated or formatted.  You may need to use post-processing techniques to improve the readability of the text.
* **Alternative STT Providers:** Consider using other speech-to-text providers (e.g., AssemblyAI, Deepgram, Amazon Transcribe) if Google Cloud Speech-to-Text doesn't meet your needs. AssemblyAI is included (commented out) as an alternative, but requires implementing the appropriate API calls.  Different providers have different pricing models, accuracy levels, and features.  The best provider depends on your specific requirements.
* **User Interface:** If you're building a user-facing application, you'll need to create a user interface for controlling the recording and displaying the transcription and sentiment analysis results.
* **Data Privacy:** Be mindful of data privacy regulations (e.g., GDPR, CCPA) when collecting and storing audio and text data.  Obtain consent from users before recording their audio and inform them about how their data will be used.

This is a complex project, and the code provided is a starting point.  You'll need to invest significant time and effort to build a production-ready solution. Remember to replace placeholder API keys and implement a real data storage solution. Good luck!
👁️ Viewed: 4

Comments