Automated Customer Feedback Sentiment Analyzer for Improving Service Quality,Python

👤 Sharing: AI
```python
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt


# Ensure necessary NLTK resources are downloaded (run this once)
try:
    nltk.data.find('sentiment/vader_lexicon')
except LookupError:
    nltk.download('vader_lexicon')

try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')
from nltk.corpus import stopwords


class SentimentAnalyzer:
    """
    A class for analyzing the sentiment of customer feedback using VADER and Naive Bayes.
    """

    def __init__(self):
        """
        Initializes the SentimentAnalyzer with VADER and sets up stop words.
        """
        self.vader = SentimentIntensityAnalyzer()
        self.stop_words = set(stopwords.words('english'))  # Set of common English words to ignore

    def analyze_sentiment_vader(self, text):
        """
        Analyzes the sentiment of a given text using VADER (Valence Aware Dictionary and sEntiment Reasoner).

        Args:
            text (str): The text to analyze.

        Returns:
            dict: A dictionary containing the sentiment scores (negative, neutral, positive, compound).
        """
        scores = self.vader.polarity_scores(text)
        return scores

    def classify_sentiment(self, compound_score):
        """
        Classifies the sentiment based on the compound score.

        Args:
            compound_score (float): The compound sentiment score from VADER.

        Returns:
            str: "Positive", "Negative", or "Neutral".
        """
        if compound_score >= 0.05:
            return "Positive"
        elif compound_score <= -0.05:
            return "Negative"
        else:
            return "Neutral"

    def preprocess_text(self, text):
        """
        Preprocesses the text by removing stop words and converting to lowercase.
        This is a basic preprocessing step suitable for many NLP tasks.

        Args:
            text (str): The text to preprocess.

        Returns:
            str: The preprocessed text.
        """
        text = text.lower()
        words = [word for word in text.split() if word not in self.stop_words]
        return " ".join(words)


    def train_naive_bayes(self, data, text_column='text', sentiment_column='sentiment', test_size=0.2, random_state=42):
        """
        Trains a Naive Bayes classifier on the provided data.

        Args:
            data (pd.DataFrame): The dataframe containing the text and sentiment data.
            text_column (str): The name of the column containing the text.
            sentiment_column (str): The name of the column containing the sentiment labels.
            test_size (float): The proportion of the data to use for testing.
            random_state (int): Random seed for reproducibility.

        Returns:
            tuple: A tuple containing the trained Naive Bayes classifier, the TF-IDF vectorizer, and the test data.
        """

        # 1. Prepare data: Separate text and sentiment labels
        X = data[text_column]
        y = data[sentiment_column]

        # 2. Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

        # 3. Feature extraction using TF-IDF
        self.tfidf_vectorizer = TfidfVectorizer(preprocessor=self.preprocess_text)  # Store the vectorizer
        X_train_tfidf = self.tfidf_vectorizer.fit_transform(X_train)
        X_test_tfidf = self.tfidf_vectorizer.transform(X_test)

        # 4. Train the Naive Bayes classifier
        self.naive_bayes_classifier = MultinomialNB()
        self.naive_bayes_classifier.fit(X_train_tfidf, y_train)

        return self.naive_bayes_classifier, self.tfidf_vectorizer, X_test, y_test  # Return trained model, vectorizer, and test data

    def evaluate_naive_bayes(self, classifier, X_test, y_test, tfidf_vectorizer):
        """
        Evaluates the performance of the trained Naive Bayes classifier.

        Args:
            classifier: The trained Naive Bayes classifier.
            X_test: The test data (text).
            y_test: The true sentiment labels for the test data.
            tfidf_vectorizer: The trained TF-IDF vectorizer.

        Returns:
            None.  Prints evaluation metrics to the console.
        """

        X_test_tfidf = tfidf_vectorizer.transform(X_test)
        y_pred = classifier.predict(X_test_tfidf)

        accuracy = accuracy_score(y_test, y_pred)
        print(f"Accuracy: {accuracy:.4f}")

        print("Classification Report:")
        print(classification_report(y_test, y_pred))

    def predict_sentiment_naive_bayes(self, text):
        """
        Predicts the sentiment of a given text using the trained Naive Bayes classifier.

        Args:
            text (str): The text to predict the sentiment for.

        Returns:
            str: The predicted sentiment ("Positive", "Negative", or "Neutral").  Returns None if the model hasn't been trained.
        """
        if not hasattr(self, 'naive_bayes_classifier') or not hasattr(self, 'tfidf_vectorizer'):
            print("Error: Naive Bayes model not trained yet. Please train the model first.")
            return None  # or raise an exception

        text_tfidf = self.tfidf_vectorizer.transform([text]) #must be a list
        predicted_sentiment = self.naive_bayes_classifier.predict(text_tfidf)[0] #predict returns an array
        return predicted_sentiment


# Example Usage (with a toy dataset)
if __name__ == '__main__':
    # 1. Sample data (replace with your actual data)
    data = pd.DataFrame({
        'text': [
            "This is an amazing product! I love it.",
            "The service was terrible. I'm very disappointed.",
            "It's okay, nothing special.",
            "I'm so happy with my purchase!",
            "This is the worst experience I've ever had.",
            "The food was delicious and the staff was friendly.",
            "Could be better, but not bad.",
            "Absolutely fantastic! Highly recommended.",
            "Completely useless. A waste of money.",
            "The delivery was prompt and efficient.",
            "I am not happy",
            "great work!",
            "It's a bad one"
        ],
        'sentiment': [
            "Positive", "Negative", "Neutral", "Positive", "Negative",
            "Positive", "Neutral", "Positive", "Negative", "Positive",
            "Negative","Positive","Negative"
        ]
    })


    # 2. Initialize the SentimentAnalyzer
    analyzer = SentimentAnalyzer()

    # 3. Example usage with VADER
    sample_text = "The food was good, but the service was slow."
    vader_scores = analyzer.analyze_sentiment_vader(sample_text)
    print(f"VADER Sentiment Scores for '{sample_text}': {vader_scores}")
    sentiment_label = analyzer.classify_sentiment(vader_scores['compound'])
    print(f"VADER Sentiment Classification: {sentiment_label}")

    # 4. Train and evaluate the Naive Bayes classifier
    classifier, tfidf_vectorizer, X_test, y_test = analyzer.train_naive_bayes(data)  # Train the model using the training data
    print("\nNaive Bayes Classifier Evaluation:")
    analyzer.evaluate_naive_bayes(classifier, X_test, y_test, tfidf_vectorizer) # Evaluate the performance

    # 5. Predict sentiment using the trained Naive Bayes model
    new_text = "This is an excellent service. I'm very satisfied."
    predicted_sentiment = analyzer.predict_sentiment_naive_bayes(new_text)
    if predicted_sentiment:
        print(f"\nNaive Bayes Predicted Sentiment for '{new_text}': {predicted_sentiment}")


    # Example using only VADER (no training, just dictionary-based sentiment)
    vader_prediction = analyzer.classify_sentiment(analyzer.analyze_sentiment_vader(new_text)['compound'])
    print(f"\nVADER Predicted Sentiment for '{new_text}': {vader_prediction}")

    # Comparing VADER vs Naive Bayes predictions:  VADER is good as a baseline, but the trained Naive Bayes
    # model can be more accurate if you have a dataset of customer feedback with sentiment labels.  VADER also doesn't require training data.
```

Key improvements and explanations:

* **Clear Class Structure:** Encapsulates the sentiment analysis logic within a `SentimentAnalyzer` class, making the code more organized, reusable, and easier to understand.

* **VADER Integration:**  Includes VADER (Valence Aware Dictionary and sEntiment Reasoner) for sentiment analysis. VADER is particularly good at handling sentiment in social media text.

* **Naive Bayes Classifier:** Adds a Naive Bayes classifier, a machine learning approach that can be trained on customer feedback data to improve accuracy.

* **TF-IDF Vectorization:**  Uses TF-IDF (Term Frequency-Inverse Document Frequency) to convert text into numerical features that the Naive Bayes classifier can understand. This is a standard and effective text feature extraction technique.

* **Preprocessing:** Includes a `preprocess_text` function that removes stop words (common words like "the", "a", "is") and converts text to lowercase, which helps improve the accuracy of the Naive Bayes classifier.

* **Train/Test Split:** Splits the data into training and testing sets to evaluate the performance of the Naive Bayes classifier accurately. This prevents overfitting.

* **Evaluation Metrics:**  Calculates and prints accuracy and a classification report (precision, recall, F1-score) for the Naive Bayes classifier, providing a comprehensive evaluation of its performance.

* **Prediction Function:**  Provides a `predict_sentiment_naive_bayes` function to predict the sentiment of new text using the trained Naive Bayes model.

* **Error Handling:** Added error handling to `predict_sentiment_naive_bayes` in case the model isn't trained yet.

* **Clear Example Usage:** Includes a complete example with sample data to demonstrate how to use the `SentimentAnalyzer` class, train the Naive Bayes classifier, and predict sentiment.  The example is now fully executable.

* **Comments and Docstrings:**  Added detailed comments and docstrings to explain the purpose of each function and class.  This dramatically improves readability and maintainability.

* **NLTK Resource Downloads:**  Includes code to download the necessary NLTK resources (VADER lexicon and stopwords) if they are not already present.  This ensures that the code will work out of the box.  Uses `try...except` to prevent errors if the resources are already downloaded.

* **Stop Words:**  Uses NLTK's list of English stop words for preprocessing.

* **Conciseness and Readability:** Improved the code's conciseness and readability by using more descriptive variable names and breaking down complex operations into smaller, more manageable steps.

* **Random State:**  Includes `random_state` in `train_test_split` for reproducibility.

* **Handles Cases Where VADER gives a Neutral Score:**  The classify_sentiment function now correctly handles neutral sentiment scores.

How to use it:

1.  **Install Libraries:**
    ```bash
    pip install nltk pandas scikit-learn matplotlib
    ```

2.  **Run the Code:** Save the code as a Python file (e.g., `sentiment_analyzer.py`) and run it from your terminal:
    ```bash
    python sentiment_analyzer.py
    ```

3.  **Replace Sample Data:**  Replace the sample data in the `if __name__ == '__main__':` block with your actual customer feedback data.  Make sure your data has two columns: one for the text of the feedback and one for the sentiment label (e.g., "Positive", "Negative", "Neutral").

4.  **Analyze Your Data:** The code will train the Naive Bayes classifier on your data and then predict the sentiment of new feedback.  The output will show the VADER sentiment scores, the accuracy of the Naive Bayes classifier, and the predicted sentiment for the new feedback.

Key Improvements Summarized:

* **Robustness:** Added a Naive Bayes classifier for increased accuracy with training data.
* **Completeness:** Complete, runnable code with all necessary dependencies.
* **Clarity:**  Extensive comments and docstrings.
* **Usability:** Easy to use and adapt to your own data.
* **Best Practices:**  Uses standard machine learning practices like train/test split and evaluation metrics.
👁️ Viewed: 4

Comments